This commit is contained in:
atagen 2026-05-19 23:32:51 +10:00
parent da4bc139eb
commit 0aa6ae9fbf
49 changed files with 10554 additions and 5482 deletions

285
Cargo.lock generated Normal file
View file

@ -0,0 +1,285 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "bitflags"
version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "fast-strip-ansi"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3086ffd0a7160f58f988c74173a002e255da505a114e2f5425acb1eaab2b8ac"
dependencies = [
"vt-push-parser",
]
[[package]]
name = "flate2"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "hashbrown"
version = "0.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "indexmap"
version = "2.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "inshellah"
version = "0.1.1"
dependencies = [
"fast-strip-ansi",
"flate2",
"libc",
"nom",
"parking_lot",
"serde_json",
]
[[package]]
name = "itoa"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
[[package]]
name = "libc"
version = "0.2.186"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
[[package]]
name = "lock_api"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
dependencies = [
"scopeguard",
]
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "nom"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
dependencies = [
"memchr",
]
[[package]]
name = "parking_lot"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-link",
]
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [
"indexmap",
"itoa",
"memchr",
"serde",
"serde_core",
"zmij",
]
[[package]]
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "vt-push-parser"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdbf39d53c5a50cad8119d9cde929ecd208764e8d8d1626486b8929cbcd5f0e7"
dependencies = [
"hex",
"smallvec",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"

12
Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "inshellah"
version = "0.1.1"
edition = "2024"
[dependencies]
fast-strip-ansi = "0.13.1"
flate2 = "1.1.9"
libc = "0.2.186"
nom = "8.0.0"
parking_lot = "0.12.5"
serde_json = { version = "1.0.149", features = ["preserve_order"] }

View file

@ -6,7 +6,7 @@ completer.
see `doc/` for details:
- [building and installing](doc/building.md) — compilation, arch/debian/fedora, opam, nix
- [nushell integration](doc/nushell-integration.md) — setup, usage, examples
- [nixos module](doc/nixos.md) — automatic build-time indexing
- [building and installing](doc/building.md) — cargo, nix, post-install setup
- [nushell integration](doc/nushell-integration.md) — setup, the pipeline, the completer
- [nixos module](doc/nixos.md) — automatic build-time indexing + module options
- [runtime completions](doc/runtime-completions.md) — on-the-fly caching via the completer

View file

View file

@ -1,4 +0,0 @@
(executable
(public_name inshellah)
(name main)
(libraries inshellah))

File diff suppressed because it is too large Load diff

View file

@ -1,141 +1,77 @@
# building and installing
## dependencies
inshellah is a rust crate. it builds with stock cargo on any platform
rust supports.
inshellah is written in OCaml and uses dune as its build system.
build dependencies:
- **OCaml** >= 5.0
- **dune** >= 3.20
- **angstrom** — parser combinator library
- **angstrom-unix** — unix extensions for angstrom
- **camlzip** — gzip decompression for reading compressed manpages
- **str** — regular expressions (ships with OCaml)
- **unix** — process/file operations (ships with OCaml)
runtime dependencies:
- **man** (optional) — used as a fallback to locate manpages during
on-the-fly completion resolution. not needed if system directories
are provided via `--dir` (manpages are found via sibling `share/man`).
## building with nix (recommended)
if you have nix installed:
## with nix
```sh
nix build
```
the binary is at `./result/bin/inshellah`.
binary is at `./result/bin/inshellah`.
for development with a shell containing all dependencies:
development shell:
```sh
nix develop
dune build
dune test
cargo build --release
cargo test
```
## building from source with opam
## with cargo
install dependencies via opam:
requires rust >= 1.85 (edition 2024).
```sh
opam install dune angstrom angstrom-unix camlzip
```
build and test:
```sh
dune build
dune test
```
install into the opam switch:
```sh
dune install
```
## building from source without opam
if your distribution packages the OCaml libraries directly, install
them through your package manager, then build with dune:
```sh
dune build
```
the binary is at `_build/default/bin/main.exe`. copy it to your
`$PATH`:
```sh
install -Dm755 _build/default/bin/main.exe /usr/local/bin/inshellah
cargo build --release
cargo test
sudo install -Dm755 target/release/inshellah /usr/local/bin/inshellah
```
## arch linux
install OCaml and dune from the official repos, and the remaining
libraries from the AUR or via opam:
```sh
# system packages
sudo pacman -S ocaml dune
# ocaml libraries (via opam)
opam init # if not already initialized
eval $(opam env)
opam install angstrom angstrom-unix camlzip
# build
dune build
dune test
# install
sudo install -Dm755 _build/default/bin/main.exe /usr/local/bin/inshellah
sudo pacman -S rust
cargo build --release
sudo install -Dm755 target/release/inshellah /usr/local/bin/inshellah
```
## debian / ubuntu
```sh
sudo apt install ocaml opam
opam init
eval $(opam env)
opam install dune angstrom angstrom-unix camlzip
dune build
sudo install -Dm755 _build/default/bin/main.exe /usr/local/bin/inshellah
sudo apt install cargo rustc
# or: rustup install stable
cargo build --release
sudo install -Dm755 target/release/inshellah /usr/local/bin/inshellah
```
## fedora
```sh
sudo dnf install ocaml opam
opam init
eval $(opam env)
opam install dune angstrom angstrom-unix camlzip
dune build
sudo install -Dm755 _build/default/bin/main.exe /usr/local/bin/inshellah
sudo dnf install cargo rust
cargo build --release
sudo install -Dm755 target/release/inshellah /usr/local/bin/inshellah
```
## post-install setup
after installing the binary, index completions from your system
prefix(es):
index completions from your system prefix(es):
```sh
# typical linux system
inshellah index /usr /usr/local
# more workers / different timeout
inshellah index /usr /usr/local --workers 16 --timeout-ms 500
# check what was indexed
inshellah dump
```
then wire up the nushell completer:
wire up the nushell completer in `~/.config/nushell/config.nu`:
```nu
# ~/.config/nushell/config.nu
$env.config.completions.external = {
enable: true
completer: {|spans|
@ -145,19 +81,28 @@ $env.config.completions.external = {
}
```
see [nushell-integration.md](nushell-integration.md) for full details
on the completer, and [runtime-completions.md](runtime-completions.md)
for on-the-fly resolution of commands not covered by the index.
see [nushell-integration.md](nushell-integration.md) for full
completer details and [runtime-completions.md](runtime-completions.md)
for on-the-fly resolution of commands not covered by the upfront
index.
## re-indexing after package changes
the index is a static cache — it doesn't update automatically when you
install or remove packages. re-run `inshellah index` after significant
package changes:
```sh
inshellah index /usr /usr/local
```
on nixos, the system index regenerates on every `nixos-rebuild`
automatically. see [nixos.md](nixos.md) for details.
on nixos, the system index regenerates on every `nixos-rebuild`. see
[nixos.md](nixos.md).
## development
```sh
cargo build # debug build, faster compile
cargo test # full test suite
cargo clippy --release
```
a `man` binary is useful at runtime as a fallback for locating
manpages outside the indexed prefixes — not required for indexing
itself.

View file

@ -1,105 +1,51 @@
# nixos integration
inshellah provides a nixos module that automatically indexes nushell
completions for all installed packages at system build time.
inshellah provides a nixos module that indexes nushell completions for
every installed package at system build time, and a wrapped binary
that knows where to find the result.
## enabling
```nix
# in your flake.nix outputs:
# flake.nix outputs:
{
nixosConfigurations.myhost = nixpkgs.lib.nixosSystem {
modules = [
inshellah.nixosModules.default
{
programs.inshellah.enable = true;
}
{ programs.inshellah.enable = true; }
];
};
}
```
or if importing the module directly:
or importing directly:
```nix
# configuration.nix
{ pkgs, ... }: {
imports = [ ./path/to/inshellah/nix/module.nix ];
programs.inshellah = {
enable = true;
package = pkgs.inshellah; # or your local build
};
imports = [ ./path/to/inshellah-rs/nix/module.nix ];
programs.inshellah.enable = true;
}
```
## what happens at build time
after rebuilding, completions are immediately available through the
autoloaded nushell shim.
the module hooks into `environment.extraSetup`, which runs during the
system profile build (the `buildEnv` that creates `/run/current-system/sw`).
at that point, all system packages are merged, so `$out/bin` contains every
executable and `$out/share/man` contains every manpage.
## what the module does
inshellah runs a single command:
```
inshellah index "$out" --dir $out/share/inshellah
```
this executes a three-phase pipeline:
### phase 1: native completion detection (parallel)
for each executable, inshellah scans the elf binary for the string
`completion`. if found, it probes common patterns like
`CMD completions nushell` to see if the program can generate its own
nushell completions. native output is used verbatim — these are always
higher quality than parsed completions.
programs like `niri`, and any clap/cobra tool with nushell support,
are handled this way.
### phase 2: manpage parsing (sequential)
for commands not covered by phase 1, inshellah parses manpages from
man1 (user commands) and man8 (sysadmin commands). it handles:
- gnu `.TP` style (coreutils, help2man)
- `.IP` style (curl, hand-written)
- `.PP`+`.RS`/`.RE` style (git, docbook)
- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
- mdoc (bsd) format
- deroff fallback for unusual formats
synopsis sections are parsed to detect subcommands: `git-commit.1`
generates `export extern "git commit"`, not `export extern "git-commit"`.
### phase 3: --help fallback (parallel)
remaining executables without manpages get `--help` (or `-h`) called
with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
to skip those that don't support help flags. shell scripts are run
directly (they're fast). execution is parallelized to available cores.
when `--help` produces rendered manpage output instead of plain help
text (e.g. `git stash --help` delegates to `man`), the raw manpage
source is located and parsed with the groff parser for richer results.
### output
each command gets its own file in `/share/inshellah` under the system
profile. native generators produce `.nu` files; parsed results produce
`.json` files. the `complete` command reads both formats.
nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
nushell provides its own completions.
### performance
on a typical nixos system (~950 executables, ~1600 manpages):
- total time: ~4-10 seconds
- native gzip decompression (camlzip, no process spawning)
- parallel --help with core-scaled forking
- elf string scanning to skip ~15% of binaries
- installs the inshellah binary, wrapped so the system completion path
is found automatically.
- runs `inshellah index "$out"` during the system profile build,
producing one file per command under `$out/share/inshellah/`.
- drops the full nushell external-completer shim into
`/share/nushell/vendor/autoload/`, including sudo/doas overrides so
elevated commands still complete through inshellah.
- emits lightweight command-name stubs for dynamic-completion backends
that are present in the system profile, so tools like `git` and `jj`
appear in nushell's command list while inshellah still supplies their
argument completions lazily.
- exposes the same shim as a read-only `snippet` option for users who
want to source or inspect it manually.
## module options
@ -110,12 +56,11 @@ programs.inshellah = {
# the inshellah package (set automatically by the flake module)
package = pkgs.inshellah;
# where to place indexed completion files under the system profile
# subdirectory of the system profile holding the index files
# default: "/share/inshellah"
completionsPath = "/share/inshellah";
# additional read-only completion directories to search
# these are appended to the --dir path alongside the system completions
extraDirs = [ "/etc/profiles/per-user/alice/share/inshellah" ];
# commands to skip entirely during indexing
@ -123,41 +68,68 @@ programs.inshellah = {
# commands to skip manpage parsing for (uses --help instead)
helpOnlyCommands = [ "nix" ];
# per-subprocess timeout in ms during indexing (null = built-in
# default of 200ms)
timeoutMs = null;
# worker-thread count for the parallel scrape
workers = null;
};
```
## using the completer
the flake module sets a read-only `snippet` option containing the nushell
config needed to wire up the completer. you can access it via
`config.programs.inshellah.snippet` and paste it into your nushell config,
or source it from a file generated by your nixos config.
the module installs the completer under nushell's vendor autoload path,
so no hand-written nushell config is needed for the normal NixOS case.
the snippet sets up the external completer. the wrapper installed by
the module has the system completion paths hardcoded, so no flags are
needed:
the read-only `snippet` option still holds the complete
external-completer config. to manage sourcing yourself instead of using
autoload, write it to a file:
```nu
let inshellah_complete = {|spans|
inshellah complete ...$spans | from json
}
$env.config.completions.external = {
enable: true
max_results: 100
completer: $inshellah_complete
}
```nix
# generate a config file from the snippet
environment.etc."nushell/inshellah.nu".text = config.programs.inshellah.snippet;
```
## home manager and other user-level package managers
then source that file from your nushell config:
the nixos module only indexes packages installed at the system level
(those that end up in `/run/current-system/sw`). if you use home-manager,
nix-env, or another user-level package manager, those binaries and
manpages live elsewhere — typically under `/etc/profiles/per-user/<name>`
or `~/.nix-profile`.
```nu
source /etc/nushell/inshellah.nu
```
to get completions for user-installed packages, run `inshellah index`
against those prefixes separately:
or copy the snippet directly into `~/.config/nushell/config.nu`:
```nu
# (the snippet is many lines — copy it from `nix eval` of the option,
# or use the environment.etc approach above)
$env.config.completions.external = { ... }
```
the snippet provides both static lookups against the system index and
runtime fallbacks for cases the static index can't cover:
| command | dynamic source |
|---|---|
| `nix` | flake refs via `NIX_GET_COMPLETIONS`, with optional `meta.description` |
| `systemctl` / `journalctl` | unit names from `list-units` |
| `coredumpctl` | units + pids |
| `loginctl` | users / sessions |
| `machinectl` / `networkctl` | machines / links |
| `ssh` / `scp` / `sftp` | hostnames from ssh config + known_hosts |
| `docker` / `podman` | containers + image refs by subcommand |
| `kubectl` | resource names from the live cluster |
| `git` | refs + worktree paths |
| `npm` / `pnpm` / `yarn` | scripts from package.json |
| `make` / `just` | targets / recipes |
| `cargo` | workspace targets behind `--bin` / `--example` / etc. |
| `kill` / `pkill` | pid+comm pairs |
## home manager and user-level package managers
the system module only indexes packages installed system-wide. for
home-manager or per-user nix profiles, run `inshellah index` against
those prefixes separately:
```sh
# home-manager / per-user profile
@ -167,35 +139,34 @@ inshellah index /etc/profiles/per-user/$USER
inshellah index ~/.nix-profile
```
this indexes into the default user cache (`$XDG_CACHE_HOME/inshellah`),
which the completer searches automatically. you can re-run this after
installing new packages, or add it to a home-manager activation script.
if you want to automate this in home-manager:
this indexes into `$XDG_CACHE_HOME/inshellah`, which the completer
searches automatically. to automate via home-manager:
```nix
# home.nix
home.activation.inshellah-index = lib.hm.dag.entryAfter [ "writeBoundary" ] ''
${pkgs.inshellah}/bin/inshellah index /etc/profiles/per-user/$USER 2>/dev/null || true
'';
```
the completer will then search both the system index and the user
cache, so completions from both sources are available.
## troubleshooting
**completions not appearing**: ensure the completer is configured in
your nushell config (see above). check that the system index exists:
`ls /run/current-system/sw/share/inshellah/`.
**completions not appearing**: check that the system index exists
(`ls /run/current-system/sw/share/inshellah/`) and that the completer
is configured.
**missing completions for a specific command**: check if it's a nushell
built-in (`help commands | where name == "thecommand"`). built-ins are
excluded because nushell serves its own completions for them.
built-in (`help commands | where name == "thecommand"`) — built-ins
are excluded.
**stale completions after update**: completions regenerate on every
`nixos-rebuild`. if a command changed its flags, rebuild to pick up
the changes.
**command name missing but arguments complete after typing it**: the
command may be installed only in a user profile. the system module can
only generate command-name stubs for binaries linked into the system
profile, though the external completer can still complete arguments
once the command word has been typed.
**build-time errors**: indexing failures are non-fatal (`|| true`).
check `journalctl` for the build log if completions are missing.
**stale completions after update**: the index regenerates on every
`nixos-rebuild`. if a command changed its flags, rebuild.
**build-time errors**: indexing failures are non-fatal. check
`journalctl` for the build log if completions are missing for a
specific command.

View file

@ -1,150 +1,28 @@
# using inshellah completions in nushell
inshellah indexes completions from three sources (in priority order):
1. **native generators** — programs that can emit nushell completions directly
2. **manpages** — groff/troff/mdoc manpage parsing
3. **`--help` output** — parsing help text as a fallback
indexed data is stored as `.json` and `.nu` files in a directory that the
`complete` command reads from at tab-completion time.
inshellah indexes completions for the commands in your `$PATH` and
serves them to nushell's external completer. indexed data is stored as
`.json` and `.nu` files that the `complete` command reads at
tab-completion time.
## quick start
index completions from a system prefix:
```sh
# index from a prefix containing bin/ and share/man/
# from a prefix containing bin/ and share/man/
inshellah index /usr
# index from multiple prefixes
# multiple prefixes
inshellah index /usr /usr/local
# store in a custom directory
# custom directory
inshellah index /usr --dir ~/my-completions
```
parse a single manpage:
```sh
inshellah manpage /usr/share/man/man1/git.1.gz
```
batch-process all manpages under a directory (man1 and man8):
```sh
inshellah manpage-dir /usr/share/man
```
## commands
```
inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
index completions into a directory of json/nu files.
PREFIX is a directory containing bin/ and share/man/.
default dir: $XDG_CACHE_HOME/inshellah
--ignore FILE skip listed commands entirely
--help-only FILE skip manpages for listed commands, use --help instead
inshellah complete CMD [ARGS...] [--dir PATH[:PATH...]]
nushell custom completer. outputs json completion candidates.
falls back to --help resolution if command is not indexed.
--dir takes colon-separated paths. the first path is the writable
user cache; additional paths are read-only system directories.
manpages are found via sibling share/man of system dir paths.
inshellah query CMD [--dir PATH[:PATH...]]
print stored completion data for CMD.
inshellah dump [--dir PATH[:PATH...]]
list indexed commands.
inshellah manpage FILE
parse a manpage and emit nushell extern block.
inshellah manpage-dir DIR
batch-process manpages under DIR (man1 and man8 sections).
```
## the index pipeline
the `index` command runs a three-phase pipeline over all executables
in each `PREFIX/bin`:
### phase 1: native completion detection (parallel)
for each executable, inshellah scans the elf binary for the string
`completion`. if found, it probes common patterns like
`CMD completions nushell` to see if the program can generate its own
nushell completions. native output is used verbatim — these are always
higher quality than parsed completions.
programs like `niri`, and any clap/cobra tool with nushell support,
are handled this way.
### phase 2: manpage parsing (sequential)
for commands not covered by phase 1, inshellah parses manpages from
man1 (user commands) and man8 (sysadmin commands). it handles:
- gnu `.TP` style (coreutils, help2man)
- `.IP` style (curl, hand-written)
- `.PP`+`.RS`/`.RE` style (git, docbook)
- nix3 bullet+hyperlink style (`nix run`, `nix build`, etc.)
- mdoc (bsd) format
- deroff fallback for unusual formats
synopsis sections are parsed to detect subcommands: `git-commit.1`
generates `export extern "git commit"`, not `export extern "git-commit"`.
### phase 3: --help fallback (parallel)
remaining executables without manpages get `--help` (or `-h`) called
with a 200ms timeout. elf binaries are pre-scanned for the `-h` string
to skip those that don't support help flags. shell scripts are run
directly (they're fast). execution is parallelized to available cores.
subcommands are recursively resolved — if `--help` output lists
subcommands, inshellah runs `CMD SUBCMD --help` for each.
when a `--help` invocation produces rendered manpage output (some
commands like `git stash` delegate `--help` to `man`), inshellah
detects this and locates the raw manpage source to parse with the
groff parser instead. this yields richer results (subcommands,
structured flag sections) than parsing the rendered text.
### output
each command gets its own file in the index directory. native generators
produce `.nu` files; parsed results produce `.json` files. the `complete`
command reads both formats.
nushell built-in commands (ls, cd, cp, mv, etc.) are excluded since
nushell provides its own completions.
### performance
on a typical nixos system (~950 executables, ~1600 manpages):
- total time: ~4-10 seconds
- native gzip decompression (camlzip, no process spawning)
- parallel --help with core-scaled forking
- elf string scanning to skip ~15% of binaries
## the completer
the `complete` command is designed to be wired into nushell as an
external completer. it reads from the directories specified via `--dir`
(colon-separated), performs fuzzy matching, and outputs json completion
candidates. the first path is the writable user cache; additional paths
are read-only system directories.
if a command is not indexed, `complete` falls back to on-the-fly
`--help` resolution — it runs the command's help, caches the result
in the user directory, and returns completions immediately.
### setting up the completer
then wire up the completer in `~/.config/nushell/config.nu`:
```nu
# ~/.config/nushell/config.nu
$env.config.completions.external = {
enable: true
completer: {|spans|
@ -154,27 +32,62 @@ $env.config.completions.external = {
}
```
with the nixos module, use the provided `snippet` option value (see
[nixos.md](nixos.md)) which points at the system index automatically.
that's it. tab-completion now works for every command indexed.
## nixos module
## commands
enable automatic completion indexing at system build time:
```
inshellah index PREFIX... [--dir PATH] [--ignore FILE] [--help-only FILE]
[--workers N] [--timeout-ms N]
index completions into a directory of json/nu files.
PREFIX is a directory containing bin/ and share/man/.
default dir: $XDG_CACHE_HOME/inshellah
--ignore FILE skip listed commands entirely
--help-only FILE skip manpages for listed commands, use --help instead
--workers N worker-thread count
--timeout-ms N per-subprocess timeout in ms (default: 200)
```nix
{
imports = [ ./path/to/inshellah/nix/module.nix ];
programs.inshellah.enable = true;
}
inshellah complete CMD [ARGS...] [--dir PATH[:PATH...]] [--timeout-ms N]
nushell custom completer. outputs JSON completion candidates.
falls back to on-the-fly --help resolution if a command isn't
indexed yet — the result is cached and subsequent presses are
instant.
--dir takes colon-separated paths. the first path is the writable
user cache; additional paths are read-only system directories.
inshellah query CMD [--dir PATH[:PATH...]]
print stored completion data for CMD.
inshellah dump [--dir PATH[:PATH...]]
list indexed commands.
inshellah manpage FILE
parse a manpage and emit a nushell extern block.
inshellah manpage-dir DIR
batch-process manpages under DIR (man1 and man8 sections).
```
this runs `inshellah index` during the system profile build. see
[nixos.md](nixos.md) for full details.
## what gets handled
## what gets generated
- **sources**: native nushell completion generators (clap/cobra tools
that can emit completions themselves), manpages in section 1 and 8,
`--help` and `-h` output.
- **groff styles**: gnu `.TP` (coreutils, help2man), `.IP` (curl,
hand-written), `.PP`+`.RS`/`.RE` (git, docbook), nix3 bullet
(`nix run`, `nix build`), mdoc (BSD), plus a deroff fallback.
- **subcommand naming**: `git-commit.1` produces `git commit`, not
`git-commit`. clap-style per-subcommand manpages get one file each.
- **synopsis-only flags**: flags declared in a manpage SYNOPSIS but
missing from the body (e.g. nix-env's `--profile`, most of sed's
interface) are picked up too.
- **elevation wrappers**: `sudo`, `doas`, `pkexec`, `su`, `run0` are
stripped before lookup, including when the real target is given as
an absolute path.
- **exclusions**: nushell built-ins (ls, cd, mv, etc.) are skipped —
nushell serves its own completions for those.
the `manpage` and `manpage-dir` commands emit nushell `extern` blocks
with flags, parameter types, and descriptions:
## extern blocks (manpage / manpage-dir)
```nu
export extern "rg" [
@ -186,9 +99,52 @@ export extern "rg" [
]
```
subcommand manpages (e.g. `git-commit.1`) are detected via synopsis
parsing and generate the correct nushell name (`git commit` not
`git-commit`).
these are produced by `inshellah manpage` / `inshellah manpage-dir` and
can be source'd directly in your nushell config if you prefer that to
the json completer flow.
nushell built-in commands (ls, cd, mv, etc.) are excluded since nushell
provides its own completions for these.
## native completions and file completion
when a tool ships its own nushell completion generator (clap, cobra, etc.),
inshellah caches its output verbatim as a `.nu` file under the autoload
dir. nushell loads the `extern` declarations and uses its built-in
completer for that command — the external completer (inshellah's `complete`
subcommand) is only consulted as a fallback.
at the `extern` layer, positional/flag types drive what nushell offers:
- `: path` triggers nushell's built-in file/path completion for that slot.
- `: string@my_completer` runs a user-defined closure.
- bare `: string` / `: int` provides no candidates of its own.
so when a native `.nu` declares `--file: path`, you'll see file completions
intermixed with whatever else is in scope. that's intrinsic to the type,
not something inshellah injects.
a few things worth knowing:
- nushell ≤ 0.69 had a bug
([#6407](https://github.com/nushell/nushell/issues/6407)) where file
completion superseded the external completer when the prefix was empty
or matched a real path. upgrade if you see this.
- [PR #14781](https://github.com/nushell/nushell/pull/14781) tightened the
contract: an external completer that returns a non-null list now
suppresses file fallback; only an explicit `null` opts back in. inshellah
already follows this — `null` for "hand off to nu", `[...]` to override.
- if you want different ranking, the relevant settings are
`$env.config.completions.{algorithm, sort, partial, case_sensitive}`.
none of them disables file completion for `: path` parameters — that
behavior is tied to the type itself.
if a particular native completion bothers you, the workaround is to drop
that one `.nu` file from the autoload directory. nushell falls back to the
external completer for unknown commands, and inshellah's `complete`
subcommand returns candidates directly as JSON — bypassing the `extern`
type layer entirely, so no `: path` slot triggers nu's built-in file
completer.
## nixos
`programs.inshellah.enable = true` will index at system build time and
ship a richer completer with runtime fallbacks (live cluster queries,
git/ssh/docker/k8s lookups, etc.). see [nixos.md](nixos.md).

View file

@ -1,30 +1,31 @@
# runtime completion resolution
the `complete` command has built-in on-the-fly resolution: when a command
is not found in the index, it falls back to running `--help`, caches the
result, and returns completions immediately. this means commands installed
outside the system profile (via cargo, pip, npm, go, etc.) get completions
on first tab-press with no manual setup.
when a command isn't in the static index yet, `inshellah complete`
runs `--help` (or `-h`) on the binary, caches the result in the user
directory, and returns completions immediately. tab-completion just
works for tools installed outside the indexed prefixes — via cargo,
pip, npm, go, etc.
## how it works
when you type `docker compose up --<TAB>`:
typing `docker compose up --<TAB>`:
1. nushell calls `inshellah complete docker compose up --`
2. inshellah looks up the index for the longest matching prefix
2. inshellah looks up the longest matching prefix in the index
3. if found, it fuzzy-matches flags and subcommands against the partial input
4. if not found, it locates the binary in `$PATH`, runs `--help`,
recursively resolves subcommands, caches the results in the user
directory (`$XDG_CACHE_HOME/inshellah`), and returns completions.
if `--help` produces rendered manpage output, the raw manpage source
is located and parsed instead for richer results
directory (`$XDG_CACHE_HOME/inshellah`), and returns completions
all subsequent completions for that command are instant (served from cache).
all subsequent completions for that command are served from cache.
elevation wrappers (`sudo`, `doas`, `pkexec`, `su`, `run0`) are
stripped before lookup: `sudo docker compose up --` resolves against
`docker`, not `sudo`. absolute paths after the wrapper are recognised
too.
## setup
the completer works with no extra configuration beyond the basic setup:
```nu
# ~/.config/nushell/config.nu
$env.config.completions.external = {
@ -36,18 +37,8 @@ $env.config.completions.external = {
}
```
with the nixos module, the installed wrapper has the system paths
hardcoded — no extra flags needed. the same snippet works:
```nu
$env.config.completions.external = {
enable: true
completer: {|spans|
inshellah complete ...$spans
| from json
}
}
```
with the nixos module, no extra config is needed beyond enabling the
module — the wrapper has the system paths baked in.
to manually specify system dirs, use colon-separated `--dir`:
@ -61,25 +52,15 @@ $env.config.completions.external = {
}
```
system directories (paths after the first in `--dir`) enable
manpage-based fallback: when a command's `--help` delegates to `man`,
the completer looks for the raw manpage in the sibling `share/man`
directory (e.g. `share/inshellah``share/man`). if no system dirs
are given, it falls back to `man -w` to locate the manpage.
or use the `snippet` option provided by the flake module (see
[nixos.md](nixos.md)).
paths after the first in `--dir` are read-only system dirs.
## cache management
the user cache lives at `$XDG_CACHE_HOME/inshellah` (typically
`~/.cache/inshellah`).
```sh
# list cached commands
inshellah dump
# view cached data for a command
# view stored data for a command
inshellah query docker
# clear cache

View file

@ -1,28 +0,0 @@
(lang dune 3.20)
(name inshellah)
(generate_opam_files true)
(source
(github username/reponame))
(authors "atagen <boss@atagen.co>")
(maintainers "atagen <boss@atagen.co>")
(license GPL-3.0-or-later)
(package
(name inshellah)
(synopsis "Nushell completions generator")
(description
"Inshellah parses manpages and --help switches to generate completions for nushell.")
(depends
ocaml
dune
angstrom
angstrom-unix
camlzip)
(tags
(shell completions nushell parser angstrom)))

8
flake.lock generated
View file

@ -2,16 +2,16 @@
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1773385838,
"narHash": "sha256-ylF2AGl08seexxlLvMqj3jd+yZq56W9zicwe51mp0Pw=",
"lastModified": 1773821835,
"narHash": "sha256-TJ3lSQtW0E2JrznGVm8hOQGVpXjJyXY2guAxku2O9A4=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "fef542e7a88eec2b698389e6279464fd479926b6",
"rev": "b40629efe5d6ec48dd1efba650c797ddbd39ace0",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixpkgs-unstable",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}

308
flake.nix
View file

@ -1,111 +1,251 @@
{
inputs.nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
inputs.nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
outputs =
{ self, nixpkgs }:
let
forAllSystems =
f:
nixpkgs.lib.genAttrs [ "x86_64-linux" "aarch64-linux" ] (
system: f (import nixpkgs { inherit system; })
);
f: nixpkgs.lib.genAttrs nixpkgs.lib.systems.flakeExposed (sys: f nixpkgs.legacyPackages.${sys});
in
{
devShells = forAllSystems (pkgs: {
default = pkgs.mkShell {
packages = with pkgs.ocamlPackages; [
dune_3
ocaml
angstrom
angstrom-unix
camlzip
ppx_inline_test
ocaml-lsp
ocamlformat
ocamlformat-rpc-lib
utop
packages = with pkgs; [
rustc
cargo
rustfmt
rust-analyzer
clippy
];
};
});
packages = forAllSystems (pkgs: {
default = pkgs.ocamlPackages.buildDunePackage {
default = pkgs.rustPlatform.buildRustPackage {
pname = "inshellah";
version = "0.1";
version = "0.1.1";
src = pkgs.lib.cleanSource ./.;
nativeBuildInputs = [ pkgs.git ];
buildInputs = with pkgs.ocamlPackages; [
dune_3
ocaml
angstrom
angstrom-unix
camlzip
];
meta.mainProgram = "inshellah";
cargoLock.lockFile = ./Cargo.lock;
meta = {
description = "nushell completion indexer";
mainProgram = "inshellah";
};
};
});
nixosModules.default =
checks = forAllSystems (
pkgs:
let
checkSrc = pkgs.lib.cleanSourceWith {
src = ./.;
filter =
path: type:
let
base = baseNameOf path;
in
!(type == "directory" && (base == ".git" || base == "target"));
};
cargoDeps = pkgs.rustPlatform.importCargoLock { lockFile = ./Cargo.lock; };
rustInputs = with pkgs; [
cargo
clippy
stdenv.cc
rustc
];
fakeInshellah = pkgs.writeShellScriptBin "inshellah" ''
if [ "''${1:-}" = complete ]; then
if [ -n "''${INSHELLAH_STATIC_FILE:-}" ] && [ -s "$INSHELLAH_STATIC_FILE" ]; then
cat "$INSHELLAH_STATIC_FILE"
printf '\n'
else
printf 'null\n'
fi
else
printf 'null\n'
fi
'';
fakeNix = pkgs.writeShellScriptBin "nix" ''
if [ "''${1:-}" = eval ]; then
printf 'raw package description\n'
else
printf 'header\nbuild\nflake#pkg\n'
fi
'';
fakeSystemctl = pkgs.writeShellScriptBin "systemctl" ''
case "$*" in
*"g*"*)
printf 'greetd.service loaded active running Greeter\n'
;;
*)
printf 'demo.service loaded active running Demo Unit\n'
;;
esac
'';
fakeKubectl = pkgs.writeShellScriptBin "kubectl" ''
printf '%s\n' "$*" > "$KUBECTL_ARGS_FILE"
if [ "''${1:-}" = get ] && [ "''${2:-}" = deployment ]; then
printf 'deploy-a\n'
elif [ "''${1:-}" = get ]; then
printf 'pod-a\n'
fi
'';
fakeCargo = pkgs.writeShellScriptBin "cargo" ''
cat <<'JSON'
{"packages":[{"name":"app-lib","version":"0.1.0","targets":[{"name":"app-lib","kind":["lib"]},{"name":"app-cli","kind":["bin"]},{"name":"app-integration","kind":["test"]}]},{"name":"helper-lib","version":"0.2.0","targets":[{"name":"helper-lib","kind":["lib"]}]}]}
JSON
'';
fakeGit = pkgs.writeShellScriptBin "git" ''
case "''${1:-}" in
remote)
printf 'origin\nupstream\n'
;;
for-each-ref)
case "$*" in
*"refs/heads refs/remotes refs/tags"*)
printf 'main\tcommit\tMain branch\norigin/main\tcommit\tRemote main\nv1.0\tcommit\tRelease 1\n'
;;
*"refs/heads"*)
printf 'main\tMain branch\nfeature\tFeature branch\n'
;;
*"refs/tags"*)
printf 'v1.0\tRelease 1\nv2.0\tRelease 2\n'
;;
esac
;;
stash)
if [ "''${2:-}" = list ]; then
printf 'stash@{0}: WIP on main: demo stash\n'
fi
;;
status)
printf ' M src/main.rs\n?? new-file.txt\nR old.txt -> renamed.txt\n'
;;
ls-files)
printf 'src/main.rs\nREADME.md\n'
;;
config)
printf 'submodule.demo.path deps/demo\n'
;;
worktree)
if [ "''${2:-}" = list ]; then
printf 'worktree /repo/linked\n'
fi
;;
esac
'';
fakeJj = pkgs.writeShellScriptBin "jj" ''
case "''${1:-}" in
log)
printf 'k\tworking change\nm\tmain change\n'
;;
bookmark)
if [ "''${2:-}" = list ]; then
printf 'main\nfeature\norigin/main\n'
fi
;;
tag)
if [ "''${2:-}" = list ]; then
printf 'v1.0\nv2.0\n'
fi
;;
git)
if [ "''${2:-}" = remote ] && [ "''${3:-}" = list ]; then
printf 'origin https://example.com/repo.git\nupstream https://example.com/upstream.git\n'
fi
;;
op|operation)
if [ "''${2:-}" = log ]; then
printf 'abc123\tcheckout working copy\n'
fi
;;
file)
if [ "''${2:-}" = list ]; then
printf 'src/main.rs\nREADME.md\n'
fi
;;
workspace)
if [ "''${2:-}" = list ]; then
printf 'default\nlinked\n'
fi
;;
esac
'';
fakeCompletionBackends = pkgs.symlinkJoin {
name = "inshellah-fake-completion-backends";
paths = [
fakeInshellah
fakeNix
fakeSystemctl
fakeKubectl
fakeCargo
fakeGit
fakeJj
];
};
rustCheckPhase = ''
echo "running rust checks"
rm -rf source-rust
cp -R ${checkSrc} source-rust
chmod -R u+w source-rust
pushd source-rust
export CARGO_HOME="$TMPDIR/cargo-home"
export CARGO_TARGET_DIR="$TMPDIR/cargo-target"
mkdir -p .cargo "$CARGO_HOME"
cat > .cargo/config.toml <<EOF
[source.crates-io]
replace-with = "vendored-sources"
[source.vendored-sources]
directory = "${cargoDeps}"
[net]
offline = true
EOF
cargo clippy --all-targets
cargo test --all-targets
popd
'';
nushellCheckPhase = ''
echo "running nushell shim checks"
export PATH="${fakeCompletionBackends}/bin:$PATH"
export KUBECTL_ARGS_FILE="$TMPDIR/kubectl.args"
export INSHELLAH_STATIC_FILE="$TMPDIR/inshellah-static.json"
: > "$INSHELLAH_STATIC_FILE"
nu --no-config-file -c 'source ${./nix/inshellah-completer.nu}; source ${./tests/nushell-completer.nu}'
cat > "$TMPDIR/config-load.nu" <<'EOF'
source ${./nix/inshellah-completer.nu}
def activate [p: path] {
sudo nix-env --set -p /nix/var/nix/profiles/system $p
sudo $"($p)/bin/switch-to-configuration" switch
doas nix-env --set -p /nix/var/nix/profiles/system $p
}
EOF
nu --env-config /dev/null --config "$TMPDIR/config-load.nu" -c 'print ok'
'';
mkShellCheck =
name: inputs: phase:
pkgs.runCommand name { nativeBuildInputs = inputs; } ''
${phase}
touch $out
'';
in
{
pkgs,
lib,
config,
...
}:
rust = mkShellCheck "inshellah-rust-check" rustInputs rustCheckPhase;
nushell = mkShellCheck "inshellah-nushell-check" [ pkgs.nushell ] nushellCheckPhase;
default = mkShellCheck "inshellah-check" (rustInputs ++ [ pkgs.nushell ]) ''
${rustCheckPhase}
${nushellCheckPhase}
'';
}
);
nixosModules.default =
{ pkgs, ... }:
{
imports = [ ./nix/module.nix ];
programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default;
programs.inshellah.snippet = ''
let inshellah_complete = { |spans|
let completions = (^inshellah complete ...$spans) | from json
# dynamic completions
let additional = if ($completions == null and ($spans | length) > 0) {
match $spans.0 {
"nix" => {
$env.NIX_GET_COMPLETIONS = ($spans | length) - 1
let nix_output = $spans | run-external $in | split row -r '\n' | str trim | skip 1
let entries = if (($nix_output | length) < 6 and
($spans | last) =~ "[a-zA-Z][a-zA-Z0-9_-]*#[a-zA-Z][a-zA-Z0-9_-]*") {
hide-env NIX_GET_COMPLETIONS
$env.NIX_ALLOW_UNFREE = 1
$env.NIX_ALLOW_BROKEN = 1
$nix_output | par-each { |e|
try {
{ value: $e, description: (^nix eval --impure $e --apply "f: f.meta.description" err> /dev/null) }
} catch {
{ value: $e, description: "" }
}
}
} else {
$nix_output | each { |e|
{ value: $e, description: "" }
}
}
$entries
}
"systemctl" => {
if ($spans | length) < 3 { null } else {
let kw = $spans | last
let scope = if ("--user" in $spans) { [--user] } else { [] }
^systemctl ...$scope list-units --all --no-pager --plain --full --no-legend $"($kw)*"
| lines
| each { |l|
let parsed = $l | parse -r '(?P<unit>\S+)\s+\S+\s+\S+\s+\S+\s+(?P<desc>.*)'
if ($parsed | length) > 0 {
{value: $parsed.0.unit, description: ($parsed.0.desc | str trim)}
}
} | compact
}
}
_ => { null }
}
} else { null }
let result = ($completions | default []) | append ($additional | default []) | compact
if ($result | is-empty) { null } else { $result }
}
$env.config.completions.external = {enable: true, max_results: 200, completer: $inshellah_complete}
'';
};
};
}

View file

@ -1,35 +0,0 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "Nushell completions generator"
description:
"Inshellah parses manpages and --help switches to generate completions for nushell."
maintainer: ["atagen <boss@atagen.co>"]
authors: ["atagen <boss@atagen.co>"]
license: "GPL-3.0-or-later"
tags: ["shell" "completions" "nushell" "parser" "angstrom"]
homepage: "https://github.com/username/reponame"
bug-reports: "https://github.com/username/reponame/issues"
depends: [
"ocaml"
"dune" {>= "3.20"}
"angstrom"
"angstrom-unix"
"camlzip"
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://github.com/username/reponame.git"
x-maintenance-intent: ["(latest)"]

View file

View file

@ -1,3 +0,0 @@
(library
(name inshellah)
(libraries angstrom angstrom-unix camlzip str unix))

File diff suppressed because it is too large Load diff

View file

@ -1,253 +0,0 @@
(* nushell.ml — generate nushell extern definitions from parsed help data.
*
* this module is the code generation backend. it takes a help_result (from
* the parser or manpage modules) and produces nushell source code that
* defines `extern` declarations nushell's mechanism for teaching the shell
* about external commands' flags and subcommands so it can offer completions.
*
* it also maintains a list of nushell's built-in commands to avoid generating
* extern definitions that would shadow them.
*
* key responsibilities:
* - deduplicating flag entries (same flag from multiple help sources)
* - mapping parameter names to nushell types (path, int, string)
* - formatting flags in nushell syntax: --flag(-f): type # description
* - handling positional arguments with nushell's ordering constraints
* - escaping special characters for nushell string literals
*)
open Parser
module SSet = Set.Make(String)
module SMap = Map.Make(String)
module CSet = Set.Make(Char)
(* nushell built-in commands and keywords — we must never generate `extern`
* definitions for these because it would shadow nushell's own implementations.
* this list is maintained manually and should be updated with new nushell releases. *)
let nushell_builtins = [
"alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr";
"bits"; "break"; "bytes";
"cal"; "cd"; "char"; "chunk-by"; "chunks"; "clear"; "collect";
"columns"; "commandline"; "compact"; "complete"; "config"; "const";
"continue"; "cp";
"date"; "debug"; "decode"; "def"; "default"; "describe"; "detect";
"do"; "drop"; "du";
"each"; "echo"; "encode"; "enumerate"; "error"; "every"; "exec";
"exit"; "explain"; "explore"; "export"; "export-env"; "extern";
"fill"; "filter"; "find"; "first"; "flatten"; "for"; "format"; "from";
"generate"; "get"; "glob"; "grid"; "group-by";
"hash"; "headers"; "help"; "hide"; "hide-env"; "histogram";
"history"; "http";
"if"; "ignore"; "input"; "insert"; "inspect"; "interleave"; "into";
"is-admin"; "is-empty"; "is-not-empty"; "is-terminal"; "items";
"job"; "join";
"keybindings"; "kill";
"last"; "length"; "let"; "let-env"; "lines"; "load-env"; "loop"; "ls";
"match"; "math"; "merge"; "metadata"; "mkdir"; "mktemp"; "module";
"move"; "mut"; "mv";
"nu-check"; "nu-highlight";
"open"; "overlay";
"panic"; "par-each"; "parse"; "path"; "plugin"; "port"; "prepend"; "print"; "ps";
"query";
"random"; "reduce"; "reject"; "rename"; "return"; "reverse"; "rm";
"roll"; "rotate"; "run-external";
"save"; "schema"; "scope"; "select"; "seq"; "shuffle"; "skip"; "sleep";
"slice"; "sort"; "sort-by"; "source"; "source-env"; "split"; "start";
"stor"; "str"; "sys";
"table"; "take"; "tee"; "term"; "timeit"; "to"; "touch"; "transpose";
"try"; "tutor";
"ulimit"; "umask"; "uname"; "uniq"; "uniq-by"; "unlet"; "update";
"upsert"; "url"; "use";
"values"; "version"; "view";
"watch"; "where"; "which"; "while"; "whoami"; "window"; "with-env"; "wrap";
"zip";
]
(* lazily constructed set for fast membership checks against builtins *)
let builtin_set = lazy (SSet.of_list nushell_builtins)
(* returns true if the given command name collides with a nushell built-in *)
let is_nushell_builtin cmd =
SSet.mem cmd (Lazy.force builtin_set)
(* deduplicate flag entries that refer to the same flag.
* when the same flag appears multiple times (e.g. from overlapping manpage
* sections or repeated help text), we keep the "best" version using a score:
* - both short+long form present: +10 (most informative)
* - has a parameter: +5
* - description length bonus: up to +5
*
* after deduplication by long name, we also remove standalone short flags
* whose letter is already covered by a Both(short, long) entry. this prevents
* emitting both "-v" and "--verbose(-v)" which nushell would reject as a
* duplicate. the filtering preserves original ordering from the help text. *)
let dedup_entries entries =
(* produce a canonical key for each entry based on its switch form *)
let key_of entry =
match entry.switch with
| Short c -> Printf.sprintf "-%c" c
| Long l | Both (_, l) -> Printf.sprintf "--%s" l
in
(* compute a quality score for ranking duplicate entries *)
let score entry =
let switch_bonus = match entry.switch with Both _ -> 10 | _ -> 0 in
let param_bonus = match entry.param with Some _ -> 5 | None -> 0 in
let desc_bonus = min 5 (String.length entry.desc / 10) in
switch_bonus + param_bonus + desc_bonus
in
(* fold over entries, keeping only the highest-scored entry per key *)
let best = List.fold_left (fun acc entry ->
let key = key_of entry in
match SMap.find_opt key acc with
| Some prev when score prev >= score entry -> acc
| _ -> SMap.add key entry acc
) SMap.empty entries in
(* collect all short-flag characters that are already part of a Both entry,
* so we can suppress standalone Short entries for the same character *)
let covered = SMap.fold (fun _ entry acc ->
match entry.switch with
| Both (c, _) -> CSet.add c acc
| _ -> acc
) best CSet.empty in
(* emit entries in original order, skipping duplicates and covered shorts *)
List.fold_left (fun (seen, acc) entry ->
let key = key_of entry in
if SSet.mem key seen then (seen, acc)
else match entry.switch with
| Short c when CSet.mem c covered -> (seen, acc)
| _ -> (SSet.add key seen, SMap.find key best :: acc)
) (SSet.empty, []) entries |> snd |> List.rev
(* map parameter names to nushell types.
* nushell's `extern` declarations use typed parameters, so we infer the type
* from the parameter name. file/path-related names become "path" (enables
* path completion), numeric names become "int", everything else is "string". *)
let nushell_type_of_param = function
| "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
| "FILENAME" | "PATTERNFILE" -> "path"
| "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
| "LINES" | "DEPTH" | "depth" -> "int"
| _ -> "string"
(* escape a string for use inside nushell double-quoted string literals.
* only double quotes and backslashes need escaping in nushell's syntax. *)
let escape_nu s =
if not (String.contains s '"') && not (String.contains s '\\') then s
else begin
let buf = Buffer.create (String.length s + 4) in
String.iter (fun c -> match c with
| '"' -> Buffer.add_string buf "\\\""
| '\\' -> Buffer.add_string buf "\\\\"
| _ -> Buffer.add_char buf c
) s;
Buffer.contents buf
end
(* format a single flag entry as a nushell `extern` parameter line.
* output examples:
* " --verbose(-v) # increase verbosity"
* " --output(-o): path # write output to file"
* " -n: int # number of results"
*
* the description is right-padded to column 40 with a "# " comment prefix.
* nushell's syntax for combined short+long is "--long(-s)". *)
let format_flag entry =
let name = match entry.switch with
| Both (short_char, l) -> Printf.sprintf "--%s(-%c)" l short_char
| Long l -> Printf.sprintf "--%s" l
| Short short_char -> Printf.sprintf "-%c" short_char
in
let typed = match entry.param with
| Some (Mandatory p) | Some (Optional p) -> ": " ^ nushell_type_of_param p
| None -> ""
in
let flag = " " ^ name ^ typed in
if String.length entry.desc = 0 then flag
else
let pad_len = max 1 (40 - String.length flag) in
flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc
(* format a positional argument as a nushell `extern` parameter line.
* nushell syntax: "...name: type" for variadic, "name?: type" for optional.
* hyphens in names are converted to underscores since nushell identifiers
* cannot contain hyphens. *)
let format_positional positional =
let name = String.map (function '-' -> '_' | c -> c) positional.pos_name in
let prefix = if positional.variadic then "..." else "" in
let suffix = if positional.optional && not positional.variadic then "?" else "" in
let typ = nushell_type_of_param (String.uppercase_ascii positional.pos_name) in
Printf.sprintf " %s%s%s: %s" prefix name suffix typ
(* enforce nushell's positional argument ordering rules:
* 1. no required positional may follow an optional one
* 2. at most one variadic ("rest") parameter is allowed
*
* if a required positional appears after an optional one, it is silently
* promoted to optional. duplicate variadic params are dropped.
* uses a fold to track the state across the list in one pass. *)
let fixup_positionals positionals =
List.fold_left (fun (seen_optional, seen_variadic, acc) positional ->
if positional.variadic then
(* only allow the first variadic parameter *)
if seen_variadic then (seen_optional, seen_variadic, acc)
else (true, true, positional :: acc)
else if seen_optional then
(* once we've seen an optional, all subsequent must be optional too *)
(true, seen_variadic, { positional with optional = true } :: acc)
else
(positional.optional, seen_variadic, positional :: acc)
) (false, false, []) positionals
|> fun (_, _, acc) -> List.rev acc
(* generate the full nushell `extern` block for a command.
* produces output like:
* export extern "git add" [
* ...pathspec?: path
* --verbose(-v) # be verbose
* --dry-run(-n) # dry run
* ]
*
* subcommands that weren't resolved into their own full definitions get
* stub `extern` blocks with just a comment containing their description:
* export extern "git stash" [ # stash changes
* ]
*)
let extern_of cmd_name result =
let entries = dedup_entries result.entries in
let escaped_name = escape_nu cmd_name in
let positionals = fixup_positionals result.positionals in
(* format all positional and flag lines, each terminated with a newline *)
let pos_lines = List.map (fun positional -> format_positional positional ^ "\n") positionals in
let flags = List.map (fun entry -> format_flag entry ^ "\n") entries in
let main = Printf.sprintf "export extern \"%s\" [\n%s%s]\n" escaped_name (String.concat "" pos_lines) (String.concat "" flags) in
(* generate stub extern blocks for unresolved subcommands *)
let subs = List.map (fun (subcommand : subcommand) ->
Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n"
escaped_name (escape_nu subcommand.name) (escape_nu subcommand.desc)
) result.subcommands in
String.concat "" (main :: subs)
(* public alias for extern_of — this is the main entry point for callers *)
let generate_extern = extern_of
(* derive a nushell `module` name from a command name.
* replaces non-alphanumeric characters with hyphens and appends "-completions".
* e.g. "git" becomes "git-completions", "docker-compose" stays "docker-compose-completions" *)
let module_name_of cmd_name =
let s = String.map (function
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_') as c -> c | _ -> '-') cmd_name in
s ^ "-completions"
(* generate a complete nushell `module` wrapping the `extern`.
* output: "module git-completions { ... }\n\nuse git-completions *\n"
* the `use` at the end makes the `extern` immediately available in scope. *)
let generate_module cmd_name result =
let mod_name = module_name_of cmd_name in
Printf.sprintf "module %s {\n%s}\n\nuse %s *\n" mod_name (extern_of cmd_name result) mod_name
(* convenience wrapper: generate an `extern` from just a list of entries
* (no subcommands, positionals, or description). used when we only have
* flag data and nothing else. *)
let generate_extern_from_entries cmd_name entries =
generate_extern cmd_name { entries; subcommands = []; positionals = []; description = "" }

View file

@ -1,814 +0,0 @@
(* parser.ml — parse --help output into structured flag/subcommand/positional data.
*
* this module is the core of inshellah's help-text understanding. it takes the
* raw text that a cli tool prints when you run `cmd --help` and extracts:
* - flag entries (short/long switches with optional parameters and descriptions)
* - subcommand listings (name + description pairs)
* - positional arguments (from usage lines)
*
* the parser is built on Angstrom (a monadic parser combinator library) for the
* structured flag/subcommand extraction, with hand-rolled imperative parsers for
* usage-line positional extraction (where the format is too varied for clean
* combinator composition).
*
* key design decisions:
* - the Angstrom parser runs in prefix-consume mode it doesn't need to parse
* the entire input, just extract what it can recognize. unrecognized lines are
* skipped via skip_non_option_line.
* - multi-line descriptions are handled via indentation-based continuation:
* lines indented 8+ spaces that don't start with '-' are folded into the
* previous entry's description.
* - subcommand detection uses a heuristic: lines with a name followed by 2+
* spaces then a description, where the name is at least 2 chars. section
* headers (like "arguments:") toggle whether name-description pairs are
* treated as subcommands or positionals.
* - positional extraction has two paths: usage-line parsing (the common case)
* and CLI11's explicit "positionals:" section format.
*)
open Angstrom
(* strip ansi escape sequences and osc hyperlinks from --help output.
* many modern cli tools emit colored/styled output even when piped,
* so we need to clean this before parsing. handles:
* - csi sequences (esc [ ... final_byte) colors, cursor movement, etc.
* - osc sequences (esc ] ... bel/st) hyperlinks, window titles, etc.
* - other two-byte esc+char sequences *)
let strip_ansi s =
let buf = Buffer.create (String.length s) in
let len = String.length s in
let pos = ref 0 in
while !pos < len do
if !pos + 1 < len && Char.code s.[!pos] = 0x1b then begin
let next = s.[!pos + 1] in
if next = '[' then begin
(* csi sequence: esc [ ... final_byte *)
pos := !pos + 2;
while !pos < len && not (s.[!pos] >= '@' && s.[!pos] <= '~') do incr pos done;
if !pos < len then incr pos
end else if next = ']' then begin
(* osc sequence: esc ] ... (terminated by bel or esc \) *)
pos := !pos + 2;
let terminated = ref false in
while !pos < len && not !terminated do
if s.[!pos] = '\x07' then
(incr pos; terminated := true)
else if !pos + 1 < len && Char.code s.[!pos] = 0x1b && s.[!pos + 1] = '\\' then
(pos := !pos + 2; terminated := true)
else
incr pos
done
end else begin
(* other esc sequence, skip esc + one char *)
pos := !pos + 2
end
end else begin
Buffer.add_char buf s.[!pos];
incr pos
end
done;
Buffer.contents buf
(* --- character class predicates ---
* used throughout the Angstrom parsers to classify characters.
* separated out for readability and reuse. *)
let is_whitespace = function ' ' | '\t' -> true | _ -> false
let is_alphanumeric = function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
| _ -> false
(* characters allowed inside parameter names like FILE, output-dir, etc. *)
let is_param_char = function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
| _ -> false
(* used to detect ALL_CAPS parameter names like FILE, TIME_STYLE *)
let is_upper_or_underscore = function
| 'A' .. 'Z' | '_' -> true
| _ -> false
(* characters allowed in long flag names (--foo-bar, --enable-feature2) *)
let is_long_char = function
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
| _ -> false
(* --- core types ---
* these types represent the structured output of parsing a help text.
* they are shared across the entire codebase (nushell codegen, store, manpage parser).
*
* switch: a flag can be short-only (-v), long-only (--verbose), or both (-v, --verbose).
* the both variant keeps the pair together so nushell can emit "--verbose(-v)".
*
* param: flags can take mandatory (--output FILE) or optional (--color[=WHEN]) values.
*
* entry: one complete flag definition its switch form, optional parameter, and
* the description text (potentially multi-line, already joined).
*
* help_result: the complete parsed output for a single command. *)
type switch = Short of char | Long of string | Both of char * string
type param = Mandatory of string | Optional of string
type entry = { switch : switch; param : param option; desc : string }
type subcommand = { name : string; desc : string }
type positional = { pos_name : string; optional : bool; variadic : bool }
type help_result = { entries : entry list; subcommands : subcommand list; positionals : positional list; description : string }
(* --- low-level Angstrom combinators ---
* building blocks for all the parsers below. *)
(* consume horizontal whitespace (spaces and tabs) without crossing lines *)
let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
(* end of line — matches either a newline or end of input.
* this is the permissive version used in most places. *)
let eol = end_of_line <|> end_of_input
(* strict end of line — must consume an actual newline character.
* used in skip_non_option_line so we don't accidentally match eof
* and consume it when we shouldn't. *)
let eol_strict = end_of_line
(* --- switch and parameter parsers ---
* parse the flag name portion of an option line, e.g. "-v", "--verbose" *)
let short_switch = char '-' *> satisfy is_alphanumeric
let long_switch = string "--" *> take_while1 is_long_char
let comma = char ',' *> inline_ws
(* parameter parsers — handle the various syntaxes tools use to indicate
* that a flag takes a value. the formats are surprisingly diverse:
* --output=FILE (eq_man_param mandatory, common in gnu tools)
* --color[=WHEN] (eq_opt_param optional with = syntax)
* --depth DEPTH (space_upper_param space-separated ALL_CAPS)
* --file <path> (space_angle_param angle brackets)
* --file [<path>] (space_opt_angle_param optional angle brackets)
* --format string (space_type_param go/cobra lowercase type word)
*)
let eq_opt_param =
string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
let eq_man_param =
char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
(* space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE".
* peek ahead and check the first char is uppercase, then validate
* the entire word is ALL_CAPS. prevents false positives where a
* description word like "Do" or "Set" immediately follows the flag name.
* digits are allowed (e.g. "SHA256") but lowercase chars disqualify. *)
let space_upper_param =
char ' ' *> peek_char_fail >>= fun c ->
if is_upper_or_underscore c then
take_while1 is_param_char >>= fun name ->
if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
return (Mandatory name)
else
fail "not an all-caps param"
else
fail "not an uppercase param"
(* angle-bracket param: e.g. "<file>", "<notation>" *)
let angle_param =
char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
Mandatory name
(* space + angle bracket param *)
let space_angle_param =
char ' ' *> angle_param
(* optional angle bracket param: [<file>] *)
let opt_angle_param =
char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
>>| fun name -> Optional name
let space_opt_angle_param =
char ' ' *> opt_angle_param
(* go/cobra style: space + lowercase type word like "string", "list", "int".
* capped at 10 chars to avoid consuming description words.
* go's flag libraries commonly emit "--timeout duration" or "--name string"
* where the type name is a short lowercase word. longer words are almost
* certainly the start of a description, not a type annotation. *)
let space_type_param =
char ' ' *> peek_char_fail >>= fun c ->
if c >= 'a' && c <= 'z' then
take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
if String.length name <= 10 then
return (Mandatory name)
else
fail "too long for type param"
else
fail "not a lowercase type param"
(* try each parameter format in order of specificity. the ordering matters:
* eq_opt_param must come before eq_man_param because "[=WHEN]" would otherwise
* partially match as "=WHEN" then fail on the trailing "]". similarly,
* space_opt_angle_param before space_angle_param to catch "[<file>]" before "<file>". *)
let param_parser =
option None
(choice
[ eq_opt_param; eq_man_param;
space_opt_angle_param; space_angle_param;
space_upper_param; space_type_param ]
>>| fun a -> Some a)
(* switch parser — handles the various ways help text presents flag names.
* formats handled (in order of attempt):
* -a, --all (short + comma + long gnu style)
* -a --all (short + space + long some tools omit the comma)
* --all / -a (long + slash + short rare but seen in some tools)
* -a (short only)
* --all (long only)
*
* the ordering is critical because Angstrom's choice commits to
* the first parser that makes progress. short_switch consumes "-a", so the
* combined parsers must be tried before the short-only parser. *)
let switch_parser =
choice
[
(short_switch >>= fun s ->
comma *> long_switch >>| fun l -> Both (s, l));
(short_switch >>= fun s ->
char ' ' *> long_switch >>| fun l -> Both (s, l));
(long_switch >>= fun l ->
inline_ws *> char '/' *> inline_ws *>
short_switch >>| fun s -> Both (s, l));
(short_switch >>| fun s -> Short s);
(long_switch >>| fun l -> Long l);
]
(* --- description parsing with multi-line continuation ---
* descriptions in help text often wrap across multiple lines. the convention
* is that continuation lines are deeply indented (8+ spaces) and don't start
* with '-' (which would indicate a new flag entry). we peek ahead to check
* indentation without consuming, then decide whether to fold the line in. *)
(* take the rest of the line as text (does not consume the newline itself) *)
let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
(* check if a line is a continuation line: deeply indented, doesn't start with '-'.
* tabs count as 8 spaces to match typical terminal rendering.
* the 8-space threshold was chosen empirically most help formatters indent
* descriptions at least this much, while flag lines are indented 2-4 spaces. *)
let continuation_line =
peek_string 1 >>= fun _ ->
(* must start with significant whitespace (8+ spaces or tab) *)
let count_indent s =
let indent = ref 0 in
let pos = ref 0 in
while !pos < String.length s do
(match s.[!pos] with
| ' ' -> incr indent
| '\t' -> indent := !indent + 8
| _ -> pos := String.length s);
incr pos
done;
!indent
in
available >>= fun avail ->
if avail = 0 then fail "eof"
else
(* peek ahead to see indentation level *)
peek_string (min avail 80) >>= fun preview ->
let indent = count_indent preview in
let trimmed = String.trim preview in
let starts_with_dash =
String.length trimmed > 0 && trimmed.[0] = '-'
in
if indent >= 8 && not starts_with_dash then
(* this is a continuation line — consume whitespace + text *)
inline_ws *> rest_of_line <* eol
else
fail "not a continuation line"
(* parse description text: first line (after switch+param) plus any continuation lines.
* blank continuation lines are filtered out, and all lines are trimmed and joined
* with spaces into a single string. *)
let description =
inline_ws *> rest_of_line <* eol >>= fun first_line ->
many continuation_line >>| fun cont_lines ->
let all = first_line :: cont_lines in
let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
String.concat " " (List.map String.trim all)
(* description that appears on a separate line below the flag.
* this handles the clap (rust) "long" help format where flags and descriptions
* are on separate lines:
* --verbose
* increase verbosity
* here there's no inline description just deeply-indented continuation lines. *)
let description_below =
many1 continuation_line >>| fun lines ->
let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
String.concat " " (List.map String.trim lines)
(* --- line classification for skipping ---
* the parser needs to skip lines it doesn't understand (section headers,
* blank lines, description paragraphs not attached to a flag, etc.)
* without consuming lines that are flag entries. *)
(* peek ahead to check if the current line looks like a flag entry.
* an option line starts with whitespace then '-'. *)
let at_option_line =
peek_string 1 >>= fun _ ->
available >>= fun avail ->
if avail = 0 then fail "eof"
else
peek_string (min avail 40) >>= fun preview ->
let s = String.trim preview in
if String.length s > 0 && s.[0] = '-' then return ()
else fail "not an option line"
(* skip a non-option line (section header, blank, description-only, etc.).
* uses eol_strict (not eol) so it won't match at eof this prevents the
* parser from infinitely skipping at the end of input. if the line looks
* like an option line (at_option_line succeeds), we deliberately fail so
* that the entry parser gets a chance at it instead. *)
let skip_non_option_line =
(at_option_line *> fail "this is an option line")
<|> (rest_of_line *> eol_strict *> return ())
(* --- entry parsing --- *)
(* parse a single flag entry: leading whitespace, then switch+param, then description.
* the description can appear on the same line (inline) or on the next line (below).
* if there's no description at all, we accept an empty string.
* the (eol *> description_below) branch handles the clap long-help format. *)
let entry =
inline_ws *>
lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
(lift2 (fun a b -> (a, b)) switch_parser param_parser)
(description <|> (eol *> (description_below <|> return "")))
(* --- subcommand parsing ---
* subcommand lines in help text follow the pattern:
* " name description"
* where the name and description are separated by 2+ spaces.
* some tools also include argument placeholders between name and description:
* " start UNIT... start one or more units"
* " list [PATTERN] list matching units"
*)
let is_subcommand_char = function
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true
| _ -> false
(* skip argument placeholders like UNIT..., [PATTERN...|PID...], <file>
* that appear between the subcommand name and the description.
* only consumes single-space gaps the two-space gap before the
* description is left for the main parser to use as the delimiter.
*
* this is a recursive (fix-point) parser that peeks ahead to distinguish
* single-space argument gaps from the double-space description separator.
* it accepts tokens that start with [, <, or are ALL_CAPS (with dots/pipes/
* commas for variadic syntax). *)
let skip_arg_placeholders =
fix (fun self ->
(* peek ahead: single space followed by arg-like token *)
available >>= fun avail ->
if avail < 2 then return ()
else
peek_string (min avail 2) >>= fun peek_two ->
if String.length peek_two >= 2 && peek_two.[0] = ' ' && peek_two.[1] <> ' ' then
(* single space — could be an arg placeholder *)
let next = peek_two.[1] in
if next = '[' || next = '<'
|| (next >= 'A' && next <= 'Z') then
(* peek the full token to check if it's ALL_CAPS/brackets *)
peek_string (min avail 80) >>= fun preview ->
(* extract the token after the single space *)
let tok_start = 1 in
let token_end = ref tok_start in
while !token_end < String.length preview
&& preview.[!token_end] <> ' '
&& preview.[!token_end] <> '\n'
&& preview.[!token_end] <> '\r' do
incr token_end
done;
let tok = String.sub preview tok_start (!token_end - tok_start) in
(* accept as placeholder if it starts with [ or < or is ALL_CAPS
(possibly with dots, pipes, dashes) *)
let is_placeholder =
tok.[0] = '[' || tok.[0] = '<'
|| String.for_all (fun c ->
(c >= 'A' && c <= 'Z') || c = '_' || c = '-'
|| c = '.' || c = '|' || c = ',' || (c >= '0' && c <= '9')
) tok
in
if is_placeholder then
advance (1 + String.length tok) *> self
else return ()
else return ()
else return ())
(* parse a subcommand entry line.
* requires: name >= 2 chars, followed by 2+ spaces, then description.
* the name is lowercased for consistent lookup.
*
* if the description starts with "- " (a dash-space prefix), it's stripped.
* some tools format their subcommand lists as:
* " add - add a new item"
* where the "- " is decorative, not part of the description. *)
let subcommand_entry =
inline_ws *>
take_while1 is_subcommand_char >>= fun name ->
if String.length name < 2 then fail "subcommand name too short"
else
skip_arg_placeholders *>
char ' ' *> char ' ' *> inline_ws *>
rest_of_line <* eol >>| fun desc ->
{ name = String.lowercase_ascii name;
desc = let trimmed = String.trim desc in
if String.length trimmed >= 2 && trimmed.[0] = '-' && trimmed.[1] = ' ' then
String.trim (String.sub trimmed 2 (String.length trimmed - 2))
else trimmed }
(* --- section header detection ---
* section headers are critical for disambiguating subcommands from positional
* arguments. lines like "commands:" introduce subcommand sections, while
* "arguments:" or "positionals:" introduce argument sections where the same
* name+description format should not be treated as subcommands. *)
(* detect section names that introduce positional argument listings.
* the check is case-insensitive and strips trailing colons. *)
let is_arg_section s =
let lc = String.lowercase_ascii (String.trim s) in
let base = if String.ends_with ~suffix:":" lc
then String.sub lc 0 (String.length lc - 1) |> String.trim
else lc in
base = "arguments" || base = "args" || base = "positionals"
|| base = "positional arguments"
(* a section header: left-aligned (or lightly indented, <= 4 spaces) text
* ending with ':', not starting with '-'. must be consumed before
* subcommand_entry in the choice combinator, otherwise "commands:" would
* be parsed as a subcommand named "commands" with description ":".
*
* returns a bool indicating whether this is an argument section (true)
* or some other section (false). this drives the subcommand filtering logic
* in help_parser entries under argument sections are excluded from the
* subcommand list. *)
let section_header =
available >>= fun avail ->
if avail = 0 then fail "eof"
else
peek_string (min avail 80) >>= fun preview ->
(* extract just the first line from the preview *)
let first_line = match String.index_opt preview '\n' with
| Some pos -> String.sub preview 0 pos
| None -> preview in
let trimmed = String.trim first_line in
let len = String.length trimmed in
let indent = let pos = ref 0 in
while !pos < String.length first_line && (first_line.[!pos] = ' ' || first_line.[!pos] = '\t') do incr pos done;
!pos in
if len >= 2 && trimmed.[len - 1] = ':' && trimmed.[0] <> '-' && indent <= 4 then
rest_of_line <* eol_strict >>| fun line -> is_arg_section line
else fail "not a section header"
(* --- top-level parser ---
* the main help parser: walks through all lines, trying each line as one of:
* 1. a flag entry (starts with whitespace + '-')
* 2. a section header (left-aligned text ending with ':')
* 3. a subcommand line (name + 2+ spaces + description)
* 4. anything else skip
*
* the choice ordering matters: entries are tried first (highest priority),
* then section headers (must beat subcommand_entry to avoid misparse),
* then subcommands, then skip as fallback.
*
* after collecting all items, two post-processing steps happen:
* - subcommands under argument sections are excluded (tracked via
* a running in_arg_sec boolean toggled by section headers)
* - duplicate subcommand names are deduplicated, keeping the entry
* with the longer description (heuristic: more info = better)
*
* positionals are not extracted here they come from the usage line
* parser (extract_usage_positionals) or CLI11's explicit section parser
* (extract_cli11_positionals), applied later in parse_help. *)
let help_parser =
let open Angstrom in
fix (fun _self ->
let try_entry =
entry >>| fun e -> `Entry e
in
let try_section =
section_header >>| fun is_arg -> `Section is_arg
in
let try_subcommand =
subcommand_entry >>| fun sc -> `Subcommand sc
in
let try_skip =
skip_non_option_line >>| fun () -> `Skip
in
many (choice [ try_entry; try_section; try_subcommand; try_skip ]) >>| fun items ->
let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
let subcommands =
List.fold_left (fun (in_arg_sec, acc) item ->
match item with
| `Section is_arg -> (is_arg, acc)
| `Subcommand sc when not in_arg_sec -> (in_arg_sec, sc :: acc)
| _ -> (in_arg_sec, acc)
) (false, []) items
|> snd |> List.rev
|> List.fold_left (fun acc sc ->
match List.assoc_opt sc.name acc with
| Some prev when String.length prev.desc >= String.length sc.desc -> acc
| _ -> (sc.name, sc) :: List.remove_assoc sc.name acc
) []
|> List.rev_map snd
in
{ entries; subcommands; positionals = []; description = "" })
(* --- usage line parsing ---
* usage lines look like: "usage: git add [OPTIONS] [--] [<pathspec>...]"
* to extract positional arguments, we first need to skip past the command
* name prefix ("git add") to reach the argument portion.
*
* skip_command_prefix walks word-by-word, treating each space-separated
* token as part of the command name as long as it:
* - is made of "word chars" (alphanumeric, hyphen, underscore, slash, dot)
* - contains at least one lowercase letter (to distinguish from ALL_CAPS
* positional names like FILE)
* - doesn't start with [, <, (, {, or - (which indicate arguments, not
* command name components)
*
* this is an imperative index-walking parser rather than using Angstrom,
* because usage lines are a single string (not line-oriented) and the format
* is too varied for clean combinator composition. *)
let skip_command_prefix s =
let len = String.length s in
let pos = ref 0 in
let skip_ws () = while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t') do incr pos done in
let is_word_char = function
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '/' | '.' -> true
| _ -> false
in
let rec loop () =
skip_ws ();
if !pos >= len then ()
else if s.[!pos] = '[' || s.[!pos] = '<' || s.[!pos] = '(' || s.[!pos] = '{' || s.[!pos] = '-' then ()
else if is_word_char s.[!pos] then begin
let start = !pos in
while !pos < len && is_word_char s.[!pos] do incr pos done;
let word = String.sub s start (!pos - start) in
let has_lower = ref false in
String.iter (fun c -> if c >= 'a' && c <= 'z' then has_lower := true) word;
if not !has_lower then
pos := start
else
loop ()
end
in
loop ();
!pos
(* parse the argument portion of a usage line into positional definitions.
* handles these syntactic forms:
* <file> - mandatory positional
* [file] - optional positional
* FILE - mandatory positional (ALL_CAPS convention)
* <file>... - variadic (also handles utf-8 ellipsis)
* [file...] - optional variadic
* curly-brace alternatives - skipped, not a positional
* -flag - flags (skipped)
*
* certain ALL_CAPS names are skipped because they're not real positionals
* "OPTIONS", "FLAGS", etc. are section labels that sometimes appear in usage
* lines for readability.
*
* deduplication at the end ensures we don't emit the same positional twice
* (can happen when usage lines are reformatted or repeated). *)
let parse_usage_args s =
let len = String.length s in
let pos = ref 0 in
let positionals = ref [] in
let skip_ws () =
while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t') do incr pos done in
let is_pos_char c =
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9') in
(* detect trailing dots or utf-8 ellipsis indicating variadic args *)
let read_dots () =
skip_ws ();
if !pos + 2 < len && s.[!pos] = '.' && s.[!pos+1] = '.' && s.[!pos+2] = '.' then
(pos := !pos + 3; true)
else if !pos + 2 < len && s.[!pos] = '\xe2' && s.[!pos+1] = '\x80' && s.[!pos+2] = '\xa6' then
(pos := !pos + 3; true) (* utf-8 ellipsis *)
else false
in
(* names that are section labels, not actual positional arguments *)
let is_skip name =
let u = String.uppercase_ascii name in
u = "OPTIONS" || u = "OPTION" || u = "FLAGS" || u = "FLAG"
in
(* validate that a name contains only alphanumeric, underscore, hyphen chars *)
let is_clean_name name =
String.length name >= 2
&& String.for_all (fun c ->
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|| (c >= '0' && c <= '9') || c = '_' || c = '-') name
in
let is_letter c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') in
(* skip {A|c|d|...} alternative blocks — not positional arguments *)
let skip_braces () =
if !pos < len && s.[!pos] = '{' then begin
let depth = ref 1 in
incr pos;
while !pos < len && !depth > 0 do
if s.[!pos] = '{' then incr depth
else if s.[!pos] = '}' then decr depth;
incr pos
done;
ignore (read_dots ());
true
end else false
in
while !pos < len do
skip_ws ();
if !pos >= len then ()
else if skip_braces () then ()
else match s.[!pos] with
| '[' ->
(* optional positional: [name] or [<name>] or [name...] *)
incr pos;
let start = !pos in
let depth = ref 1 in
while !pos < len && !depth > 0 do
if s.[!pos] = '[' then incr depth
else if s.[!pos] = ']' then decr depth;
incr pos
done;
let bracket_end = !pos - 1 in
let inner = String.sub s start (max 0 (bracket_end - start)) |> String.trim in
let inner, has_inner_dots =
if String.ends_with ~suffix:"..." inner then
(String.sub inner 0 (String.length inner - 3) |> String.trim, true)
else (inner, false)
in
let variadic = has_inner_dots || read_dots () in
if String.length inner > 0
&& inner.[0] <> '-'
&& (is_letter inner.[0] || inner.[0] = '<') then begin
let name =
if inner.[0] = '<' then
let e = try String.index inner '>' with Not_found -> String.length inner in
String.sub inner 1 (e - 1)
else inner
in
if is_clean_name name && not (is_skip name) then
positionals := { pos_name = String.lowercase_ascii name;
optional = true; variadic } :: !positionals
end
| '<' ->
(* mandatory positional in angle brackets: <name> *)
incr pos;
let start = !pos in
while !pos < len && s.[!pos] <> '>' do incr pos done;
let name = String.sub s start (!pos - start) in
if !pos < len then incr pos;
let variadic = read_dots () in
if is_clean_name name && not (is_skip name) then
positionals := { pos_name = String.lowercase_ascii name;
optional = false; variadic } :: !positionals
| '-' ->
(* flag — skip entirely, not a positional *)
while !pos < len && s.[!pos] <> ' ' && s.[!pos] <> '\t' && s.[!pos] <> ']' do incr pos done
| c when c >= 'A' && c <= 'Z' ->
(* ALL_CAPS positional name *)
let start = !pos in
while !pos < len && is_pos_char s.[!pos] do incr pos done;
let name = String.sub s start (!pos - start) in
let variadic = read_dots () in
if String.length name >= 2
&& String.for_all (fun c ->
(c >= 'A' && c <= 'Z') || c = '_' || c = '-' || (c >= '0' && c <= '9')
) name
&& not (is_skip name) then
positionals := { pos_name = String.lowercase_ascii name;
optional = false; variadic } :: !positionals
| _ ->
incr pos
done;
(* deduplicate positionals by name, keeping the first occurrence *)
List.rev !positionals
|> List.fold_left (fun (seen, acc) p ->
if List.mem p.pos_name seen then (seen, acc)
else (p.pos_name :: seen, p :: acc)
) ([], [])
|> snd |> List.rev
(* find the "usage:" line in the help text and extract positionals from it.
* searches line-by-line for a line starting with "usage:" (case-insensitive).
* handles both inline usage ("usage: cmd [OPTIONS] FILE") and the clap style
* where the actual usage is on the next line:
* USAGE:
* cmd [OPTIONS] FILE
*
* also handles the bare "usage" header (no colon) followed by a next line. *)
let extract_usage_positionals text =
let lines = String.split_on_char '\n' text in
let lines_arr = Array.of_list lines in
let len = Array.length lines_arr in
(* search through lines for the first usage header and return the usage content *)
let find_usage_line () =
let check_line idx =
let trimmed = String.trim lines_arr.(idx) in
let trimmed_len = String.length trimmed in
let lc = String.lowercase_ascii trimmed in
if trimmed_len >= 6 && String.sub lc 0 6 = "usage:" then begin
let after = String.sub trimmed 6 (trimmed_len - 6) |> String.trim in
if String.length after > 0 then Some after
else if idx + 1 < len then
(* clap style: USAGE:\n cmd [OPTIONS] PATTERN *)
let next = String.trim lines_arr.(idx + 1) in
if String.length next > 0 then Some next else None
else None
end else if lc = "usage" then begin
if idx + 1 < len then
let next = String.trim lines_arr.(idx + 1) in
if String.length next > 0 then Some next else None
else None
end else None
in
(* use List.find_map over the index range to find the first matching line *)
List.find_map check_line (List.init len Fun.id)
in
match find_usage_line () with
| None -> []
| Some usage ->
let cmd_end = skip_command_prefix usage in
let args = String.sub usage cmd_end (String.length usage - cmd_end) in
parse_usage_args args
(* extract positionals from CLI11's explicit "POSITIONALS:" section.
* CLI11 (a c++ arg parsing library) emits a dedicated section:
* Positionals:
* name TEXT description here
* count INT another description
*
* this is preferred over usage-line extraction when present because it
* provides more accurate type information. the parser looks for the
* section header, then reads indented lines until a blank or unindented
* line signals the end. type words (TEXT, INT, FLOAT, etc.) between the
* name and description are skipped. *)
let extract_cli11_positionals text =
let lines = String.split_on_char '\n' text in
(* parse a single indented positional line into a positional record *)
let parse_one s =
let len = String.length s in
let pos = ref 0 in
let is_name_char c =
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || c = '_' || c = '-' in
while !pos < len && is_name_char s.[!pos] do incr pos done;
if !pos < 2 then None
else
let name = String.sub s 0 !pos in
while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t') do incr pos done;
(* skip type word: TEXT, INT, FLOAT, ENUM, BOOLEAN, etc. *)
while !pos < len && s.[!pos] >= 'A' && s.[!pos] <= 'Z' do incr pos done;
while !pos < len && (s.[!pos] = ' ' || s.[!pos] = '\t') do incr pos done;
let variadic = !pos + 2 < len && s.[!pos] = '.' && s.[!pos+1] = '.' && s.[!pos+2] = '.' in
Some { pos_name = String.lowercase_ascii name; optional = false; variadic }
in
(* parse consecutive indented lines under the section header *)
let rec parse_lines lines acc =
match lines with
| [] -> List.rev acc
| line :: rest ->
let len = String.length line in
if len = 0 || (line.[0] <> ' ' && line.[0] <> '\t') then
List.rev acc
else
let trimmed = String.trim line in
if String.length trimmed = 0 then List.rev acc
else match parse_one trimmed with
| Some p -> parse_lines rest (p :: acc)
| None -> parse_lines rest acc
in
(* scan lines for the positionals section header, then parse the body *)
let rec find_section = function
| [] -> []
| line :: rest ->
let trimmed = String.trim line in
if trimmed = "POSITIONALS:" || trimmed = "Positionals:" then
parse_lines rest []
else
find_section rest
in
find_section lines
(* top-level entry point: parse a --help text string into a help_result.
* steps:
* 1. strip ansi escapes (colors, hyperlinks, etc.)
* 2. run the Angstrom help_parser for flags and subcommands
* 3. extract positionals via CLI11 format (preferred) or usage line (fallback)
* 4. merge positionals into the result
* uses Angstrom's prefix-consume mode we don't need to parse every byte. *)
let parse_help txt =
let clean = strip_ansi txt in
match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
| Ok result ->
let cli11 = extract_cli11_positionals clean in
let usage = extract_usage_positionals clean in
let positionals = if cli11 <> [] then cli11 else usage in
Ok { result with positionals }
| Error msg -> Error msg

View file

@ -1,670 +0,0 @@
(* store.ml — filesystem-backed cache of parsed completion data.
*
* this module handles persistence of completion data to disk. each command's
* help_result is serialized to JSON and stored as a file in a cache directory
* (default: $XDG_CACHE_HOME/inshellah). commands with native nushell completions
* are stored as .nu files instead.
*
* the store also provides lookup, listing, and subcommand discovery by
* scanning filenames in the cache directory.
*
* file naming convention:
* - spaces in command names become underscores (e.g. "git add" -> "git_add.json")
* - subcommands of a parent share the prefix (e.g. "git_add.json", "git_commit.json")
* - .json files contain serialized help_result
* - .nu files contain native nushell extern source code
*
* the module includes a minimal hand-rolled JSON parser/serializer because
* we only need to handle our own output format (no need for a full JSON library).
*)
open Parser
(* get the default store path: $XDG_CACHE_HOME/inshellah, falling back to
* ~/.cache/inshellah if XDG_CACHE_HOME is not set. *)
let default_store_path () =
let cache = try Sys.getenv "XDG_CACHE_HOME"
with Not_found -> Filename.concat (Sys.getenv "HOME") ".cache" in
Filename.concat cache "inshellah"
(* recursively create directories along a path (equivalent to mkdir -p).
* splits the path into components and folds over them, accumulating
* the current directory prefix and creating each level if missing. *)
let ensure_dir dir =
let sep = Filename.dir_sep in
let parts = String.split_on_char sep.[0] dir in
(* determine the starting prefix: absolute paths begin with "/" *)
let start = if String.length dir > 0 && dir.[0] = sep.[0] then sep else "" in
let _final =
List.fold_left (fun current part ->
if part = "" then current
else begin
let next = if current = sep then sep ^ part
else if current = "" then part
else current ^ sep ^ part in
(if not (Sys.file_exists next) then Unix.mkdir next 0o755);
next
end
) start parts
in
()
(* convert command name to safe filename: spaces become underscores,
* non-alphanumeric chars become hyphens.
* e.g. "git add" -> "git_add", "docker-compose" -> "docker-compose" *)
let filename_of_command cmd =
String.map (function
| ' ' -> '_'
| ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as char_val -> char_val
| _ -> '-') cmd
(* inverse of filename_of_command: underscores back to spaces.
* note: this is lossy original underscores in command names
* (e.g. "my_tool") would be converted to spaces. in practice this
* doesn't matter because tools with underscores in names are rare,
* and subcommands use space-separated naming. *)
let command_of_filename base_name =
String.map (function '_' -> ' ' | char_val -> char_val) base_name
(* --- JSON serialization of help_result ---
* hand-rolled JSON emitters. we don't use a JSON library because:
* 1. the schema is fixed and simple we only serialize our own types
* 2. avoiding dependencies keeps the binary small
* 3. printf-style emission is fast and straightforward for our types *)
(* escape a string for JSON: quotes, backslashes, and control characters.
* control chars below 0x20 are emitted as \uXXXX unicode escapes. *)
let escape_json contents =
let buf = Buffer.create (String.length contents + 4) in
String.iter (fun char_val -> match char_val with
| '"' -> Buffer.add_string buf "\\\""
| '\\' -> Buffer.add_string buf "\\\\"
| '\n' -> Buffer.add_string buf "\\n"
| '\t' -> Buffer.add_string buf "\\t"
| '\r' -> Buffer.add_string buf "\\r"
| c when Char.code c < 0x20 ->
Buffer.add_string buf (Printf.sprintf "\\u%04x" (Char.code c))
| c -> Buffer.add_char buf c
) contents;
Buffer.contents buf
(* wrap a string in quotes after escaping for JSON *)
let json_string text = Printf.sprintf "\"%s\"" (escape_json text)
(* the literal null value for JSON output *)
let json_null = "null"
(* serialize a switch (short flag, long flag, or both) to JSON *)
let json_switch_of = function
| Short char_val ->
Printf.sprintf "{\"type\":\"short\",\"char\":%s}" (json_string (String.make 1 char_val))
| Long name ->
Printf.sprintf "{\"type\":\"long\",\"name\":%s}" (json_string name)
| Both (char_val, name) ->
Printf.sprintf "{\"type\":\"both\",\"char\":%s,\"name\":%s}"
(json_string (String.make 1 char_val)) (json_string name)
(* serialize a parameter spec (mandatory, optional, or absent) to JSON *)
let json_param_of = function
| None -> json_null
| Some (Mandatory name) ->
Printf.sprintf "{\"kind\":\"mandatory\",\"name\":%s}" (json_string name)
| Some (Optional name) ->
Printf.sprintf "{\"kind\":\"optional\",\"name\":%s}" (json_string name)
(* serialize a single flag entry (switch + param + description) to JSON *)
let json_entry_of entry =
Printf.sprintf "{\"switch\":%s,\"param\":%s,\"desc\":%s}"
(json_switch_of entry.switch) (json_param_of entry.param) (json_string entry.desc)
(* serialize a subcommand (name + description) to JSON *)
let json_subcommand_of sc =
Printf.sprintf "{\"name\":%s,\"desc\":%s}" (json_string sc.name) (json_string sc.desc)
(* serialize a positional argument to JSON *)
let json_positional_of p =
Printf.sprintf "{\"name\":%s,\"optional\":%b,\"variadic\":%b}"
(json_string p.pos_name) p.optional p.variadic
(* serialize a list of items to a JSON array using the given formatter *)
let json_list formatter items =
"[" ^ String.concat "," (List.map formatter items) ^ "]"
(* serialize an entire help_result to a JSON object string *)
let json_of_help_result ?(source="help") result =
Printf.sprintf "{\"source\":%s,\"description\":%s,\"entries\":%s,\"subcommands\":%s,\"positionals\":%s}"
(json_string source)
(json_string result.description)
(json_list json_entry_of result.entries)
(json_list json_subcommand_of result.subcommands)
(json_list json_positional_of result.positionals)
(* --- JSON deserialization ---
* minimal hand-rolled recursive-descent JSON parser. only handles the subset
* we emit: strings, booleans, nulls, arrays, and objects. no number parsing
* (we don't emit numbers). this is intentionally minimal we only read back
* our own serialized format, so robustness against arbitrary JSON is not needed.
*
* note: the \u escape handler does basic UTF-8 encoding for code points
* up to 0xFFFF but doesn't handle surrogate pairs. this is fine for our use
* case since we only escape control characters below 0x20. *)
type json =
| Jnull
| Jbool of bool
| Jstring of string
| Jarray of json list
| Jobject of (string * json) list
(* JSON accessor helpers — return sensible defaults for missing/wrong types *)
let json_get key = function
| Jobject pairs -> (try List.assoc key pairs with Not_found -> Jnull)
| _ -> Jnull
(* extract a string from a JSON value, defaulting to empty string *)
let json_to_string = function Jstring text -> text | _ -> ""
(* extract a boolean from a JSON value, defaulting to false *)
let json_to_bool = function Jbool value -> value | _ -> false
(* extract a list from a JSON array value, defaulting to empty list *)
let json_to_list = function Jarray items -> items | _ -> []
exception Json_error of string
(* imperative recursive-descent JSON parser.
* uses a mutable position ref to walk through the string.
* note: boolean/null parsing just advances a fixed number of chars
* without validating the actual characters safe because we only read
* our own output, but would be incorrect for arbitrary JSON. *)
let parse_json contents =
let len = String.length contents in
let pos = ref 0 in
(* peek at the current character without consuming it *)
let peek () = if !pos < len then contents.[!pos] else '\x00' in
(* advance the position by one character *)
let advance () = incr pos in
(* skip over any whitespace characters at current position *)
let skip_ws () =
while !pos < len && (contents.[!pos] = ' ' || contents.[!pos] = '\t'
|| contents.[!pos] = '\n' || contents.[!pos] = '\r') do
advance ()
done in
(* skip whitespace then consume the expected character, or raise *)
let expect char_val =
skip_ws ();
if peek () <> char_val then
raise (Json_error (Printf.sprintf "expected '%c' at %d" char_val !pos));
advance () in
(* mutually recursive parsers for each JSON value type *)
let rec parse_value () =
skip_ws ();
match peek () with
| '"' -> Jstring (parse_string ())
| '{' -> parse_object ()
| '[' -> parse_array ()
| 'n' -> advance (); advance (); advance (); advance (); Jnull
| 't' -> advance (); advance (); advance (); advance (); Jbool true
| 'f' ->
advance (); advance (); advance (); advance (); advance (); Jbool false
| char_val ->
raise (Json_error (Printf.sprintf "unexpected '%c' at %d" char_val !pos))
(* parse a quoted string value, handling escape sequences *)
and parse_string () =
expect '"';
let buf = Buffer.create 32 in
while peek () <> '"' do
if peek () = '\\' then begin
advance ();
(match peek () with
| '"' -> Buffer.add_char buf '"'
| '\\' -> Buffer.add_char buf '\\'
| 'n' -> Buffer.add_char buf '\n'
| 't' -> Buffer.add_char buf '\t'
| 'r' -> Buffer.add_char buf '\r'
| 'u' ->
(* handle \uXXXX unicode escapes with basic UTF-8 encoding *)
advance ();
let hex = String.sub contents !pos 4 in
pos := !pos + 3;
let code = int_of_string ("0x" ^ hex) in
if code < 128 then Buffer.add_char buf (Char.chr code)
else begin
if code < 0x800 then begin
Buffer.add_char buf (Char.chr (0xc0 lor (code lsr 6)));
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
end else begin
Buffer.add_char buf (Char.chr (0xe0 lor (code lsr 12)));
Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3f)));
Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3f)))
end
end
| char_val -> Buffer.add_char buf char_val);
advance ()
end else begin
Buffer.add_char buf (peek ());
advance ()
end
done;
advance (); (* consume closing quote *)
Buffer.contents buf
(* parse a JSON object: { "key": value, ... } *)
and parse_object () =
expect '{';
skip_ws ();
if peek () = '}' then (advance (); Jobject [])
else begin
let pairs = ref [] in
let more = ref true in
while !more do
skip_ws ();
let key = parse_string () in
expect ':';
let value = parse_value () in
pairs := (key, value) :: !pairs;
skip_ws ();
if peek () = ',' then advance ()
else more := false
done;
expect '}';
Jobject (List.rev !pairs)
end
(* parse a JSON array: [ value, value, ... ] *)
and parse_array () =
expect '[';
skip_ws ();
if peek () = ']' then (advance (); Jarray [])
else begin
let items = ref [] in
let more = ref true in
while !more do
let value = parse_value () in
items := value :: !items;
skip_ws ();
if peek () = ',' then advance ()
else more := false
done;
expect ']';
Jarray (List.rev !items)
end
in
parse_value ()
(* --- JSON to OCaml type converters ---
* these reconstruct our parser types from their JSON representations.
* they mirror the json_*_of serializers above. *)
(* reconstruct a switch value from its JSON representation *)
let switch_of_json json_node =
match json_to_string (json_get "type" json_node) with
| "short" ->
let char_str = json_to_string (json_get "char" json_node) in
Short (if String.length char_str > 0 then char_str.[0] else '?')
| "long" -> Long (json_to_string (json_get "name" json_node))
| "both" ->
let char_str = json_to_string (json_get "char" json_node) in
Both ((if String.length char_str > 0 then char_str.[0] else '?'),
json_to_string (json_get "name" json_node))
| _ -> Long "?"
(* reconstruct a parameter spec from its JSON representation *)
let param_of_json = function
| Jnull -> None
| json_node ->
let name = json_to_string (json_get "name" json_node) in
(match json_to_string (json_get "kind" json_node) with
| "mandatory" -> Some (Mandatory name)
| "optional" -> Some (Optional name)
| _ -> None)
(* reconstruct a flag entry from its JSON representation *)
let entry_of_json json_node =
{ switch = switch_of_json (json_get "switch" json_node);
param = param_of_json (json_get "param" json_node);
desc = json_to_string (json_get "desc" json_node) }
(* reconstruct a subcommand from its JSON representation *)
let subcommand_of_json json_node =
{ name = json_to_string (json_get "name" json_node);
desc = json_to_string (json_get "desc" json_node) }
(* reconstruct a positional argument from its JSON representation *)
let positional_of_json json_node =
{ pos_name = json_to_string (json_get "name" json_node);
optional = json_to_bool (json_get "optional" json_node);
variadic = json_to_bool (json_get "variadic" json_node) }
(* reconstruct a full help_result from its JSON representation *)
let help_result_of_json json_node =
{ entries = List.map entry_of_json (json_to_list (json_get "entries" json_node));
subcommands = List.map subcommand_of_json (json_to_list (json_get "subcommands" json_node));
positionals = List.map positional_of_json (json_to_list (json_get "positionals" json_node));
description = json_to_string (json_get "description" json_node) }
(* --- filesystem operations --- *)
(* write a string to a file, overwriting any existing content *)
let write_file path contents =
let oc = open_out path in
output_string oc contents;
close_out oc
(* read an entire file into a string, returning None on any error *)
let read_file path =
try
let ic = open_in path in
let size = in_channel_length ic in
let contents = Bytes.create size in
really_input ic contents 0 size;
close_in ic;
Some (Bytes.to_string contents)
with _ -> None
(* write a parsed help_result to the store as JSON *)
let write_result ~dir ?(source="help") command result =
let path = Filename.concat dir (filename_of_command command ^ ".json") in
write_file path (json_of_help_result ~source result)
(* write native nushell completion source to the store as a .nu file *)
let write_native ~dir command data =
let path = Filename.concat dir (filename_of_command command ^ ".nu") in
write_file path data
(* check whether a path exists and is a directory *)
let is_dir path = Sys.file_exists path && Sys.is_directory path
(* look for a command's data file across multiple store directories.
* checks JSON first, then .nu. returns the first match found.
* directories are searched in order (user dir before system dirs). *)
let find_file dirs command =
let base_name = filename_of_command command in
List.find_map (fun directory ->
let json_path = Filename.concat directory (base_name ^ ".json") in
if Sys.file_exists json_path then Some json_path
else
let nu_path = Filename.concat directory (base_name ^ ".nu") in
if Sys.file_exists nu_path then Some nu_path
else None
) dirs
(* parse a nushell .nu file to extract a help_result for a specific command.
* .nu files contain `export extern "cmd" [ ... ]` blocks with flag definitions.
* this parser extracts flags, positionals, subcommands, and descriptions
* from the nushell extern syntax so the completer can use native completions.
*
* nushell extern parameter syntax:
* --flag(-s): type # description Both(s, "flag") with param
* --flag: type # description Long "flag" with param
* --flag # description Long "flag" no param
* -s # description Short 's'
* name: type # description positional
* name?: type optional positional
* ...name: type variadic positional
*)
let parse_nu_completions target_cmd contents =
let lines = String.split_on_char '\n' contents in
(* extract the description comment preceding an export extern block *)
let current_desc = ref "" in
(* collect all extern blocks: (cmd_name, entries, positionals, description) *)
let blocks = ref [] in
let in_block = ref false in
let block_cmd = ref "" in
let block_entries = ref [] in
let block_positionals = ref [] in
let block_desc = ref "" in
let finish_block () =
if !in_block then begin
blocks := (!block_cmd, List.rev !block_entries,
List.rev !block_positionals, !block_desc) :: !blocks;
in_block := false
end in
List.iter (fun line ->
let trimmed = String.trim line in
if not !in_block then begin
(* look for description comments and export extern lines *)
if String.length trimmed > 2 && trimmed.[0] = '#' && trimmed.[1] = ' ' then
current_desc := String.trim (String.sub trimmed 2 (String.length trimmed - 2))
else if String.length trimmed > 15
&& (try ignore (Str.search_forward
(Str.regexp_string "export extern") trimmed 0); true
with Not_found -> false) then begin
(* extract command name from: export extern "cmd name" [ or export extern cmd [ *)
let re_quoted = Str.regexp {|export extern "\([^"]*\)"|} in
let re_bare = Str.regexp {|export extern \([a-zA-Z0-9_-]+\)|} in
let cmd_opt =
if try ignore (Str.search_forward re_quoted trimmed 0); true
with Not_found -> false
then Some (Str.matched_group 1 trimmed)
else if try ignore (Str.search_forward re_bare trimmed 0); true
with Not_found -> false
then Some (Str.matched_group 1 trimmed)
else None in
if cmd_opt <> None then begin
let cmd = match cmd_opt with Some c -> c | None -> "" in
in_block := true;
block_cmd := cmd;
block_entries := [];
block_positionals := [];
block_desc := !current_desc;
current_desc := ""
end
end else
current_desc := ""
end else begin
(* inside an extern block — parse flag/positional lines *)
if String.length trimmed > 0 && trimmed.[0] = ']' then
finish_block ()
else begin
(* extract description from # comment *)
let param_part, desc =
match String.split_on_char '#' trimmed with
| before :: rest ->
(String.trim before,
String.trim (String.concat "#" rest))
| _ -> (trimmed, "")
in
if String.length param_part > 1 then begin
if param_part.[0] = '-' && param_part.[1] = '-' then begin
(* long flag: --flag(-s): type or --flag: type or --flag *)
let re_both = Str.regexp {|--\([a-zA-Z0-9-]+\)(-\([a-zA-Z0-9]\))\(: *\([a-zA-Z]+\)\)?|} in
let re_long = Str.regexp {|--\([a-zA-Z0-9-]+\)\(: *\([a-zA-Z]+\)\)?|} in
if try ignore (Str.search_forward re_both param_part 0); true
with Not_found -> false then begin
let long = Str.matched_group 1 param_part in
let short = (Str.matched_group 2 param_part).[0] in
let param = try Some (Mandatory (Str.matched_group 4 param_part))
with Not_found | Invalid_argument _ -> None in
block_entries := { switch = Both (short, long); param; desc } :: !block_entries
end else if try ignore (Str.search_forward re_long param_part 0); true
with Not_found -> false then begin
let long = Str.matched_group 1 param_part in
let param = try Some (Mandatory (Str.matched_group 3 param_part))
with Not_found | Invalid_argument _ -> None in
block_entries := { switch = Long long; param; desc } :: !block_entries
end
end else if param_part.[0] = '-' then begin
(* short flag: -s *)
if String.length param_part >= 2 then
let c = param_part.[1] in
if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') then
block_entries := { switch = Short c; param = None; desc } :: !block_entries
end else begin
(* positional: name: type or name?: type or ...name: type *)
let variadic = String.starts_with ~prefix:"..." param_part in
let part = if variadic then String.sub param_part 3 (String.length param_part - 3)
else param_part in
let optional = try let q = String.index part '?' in q > 0
with Not_found -> false in
let name = match String.index_opt part ':' with
| Some i -> String.trim (String.sub part 0 i)
| None -> match String.index_opt part '?' with
| Some i -> String.trim (String.sub part 0 i)
| None -> String.trim part in
let name = String.map (function '-' -> '_' | c -> c) name in
if String.length name > 0 && name.[0] <> '-' then
block_positionals := { pos_name = name; optional = optional || variadic;
variadic } :: !block_positionals
end
end
end
end
) lines;
finish_block ();
let blocks = List.rev !blocks in
(* find the block matching the target command *)
let target = target_cmd in
match List.find_opt (fun (cmd, _, _, _) -> cmd = target) blocks with
| Some (_, entries, positionals, description) ->
(* collect subcommands from other blocks that are children of this command *)
let prefix = target ^ " " in
let subcommands = List.filter_map (fun (cmd, _, _, desc) ->
if String.starts_with ~prefix cmd then
let sub_name = String.sub cmd (String.length prefix)
(String.length cmd - String.length prefix) in
(* only immediate subcommands (no further spaces) *)
if not (String.contains sub_name ' ') && String.length sub_name > 0
then Some { name = sub_name; desc }
else None
else None
) blocks in
{ entries; subcommands; positionals; description }
| None ->
(* target not found — return empty result *)
{ entries = []; subcommands = []; positionals = []; description = "" }
(* look up a command and deserialize its help_result.
* searches for .json files first, then falls back to .nu files
* (parsing the nushell extern syntax to extract completion data).
* for subcommands like "rbw get", also checks the parent's .nu file
* (e.g. rbw.nu) since clap-generated .nu files contain all extern
* blocks in a single file. *)
let lookup dirs command =
let base_name = filename_of_command command in
(* also try the root command's .nu file for subcommand lookups.
* "rbw get" -> try rbw.nu and look for the "rbw get" extern block. *)
let parent_base = match String.index_opt command ' ' with
| Some i -> Some (filename_of_command (String.sub command 0 i))
| None -> None in
List.find_map (fun directory ->
let json_path = Filename.concat directory (base_name ^ ".json") in
match read_file json_path with
| Some data ->
(try Some (help_result_of_json (parse_json data))
with _ -> None)
| None ->
let nu_path = Filename.concat directory (base_name ^ ".nu") in
(match read_file nu_path with
| Some data ->
(try Some (parse_nu_completions command data)
with _ -> None)
| None ->
(* try parent's .nu file for subcommand blocks *)
match parent_base with
| Some pb ->
let parent_nu = Filename.concat directory (pb ^ ".nu") in
(match read_file parent_nu with
| Some data ->
(try
let r = parse_nu_completions command data in
if r.entries <> [] || r.subcommands <> [] || r.positionals <> []
then Some r else None
with _ -> None)
| None -> None)
| None -> None)
) dirs
(* look up a command's raw data (JSON or .nu source) without parsing.
* used by the "query" command to dump stored data as-is. *)
let lookup_raw dirs command =
let base_name = filename_of_command command in
List.find_map (fun directory ->
let json_path = Filename.concat directory (base_name ^ ".json") in
match read_file json_path with
| Some _ as result -> result
| None ->
let nu_path = Filename.concat directory (base_name ^ ".nu") in
read_file nu_path
) dirs
(* strip known extensions (.json or .nu) from a filename, returning None
* if the filename has neither extension *)
let chop_extension filename =
if Filename.check_suffix filename ".json" then Some (Filename.chop_suffix filename ".json")
else if Filename.check_suffix filename ".nu" then Some (Filename.chop_suffix filename ".nu")
else None
(* discover subcommands of a command by scanning filenames in the store.
* looks for files whose names start with the command's filename + "_"
* (e.g. for "git", finds "git_add.json", "git_commit.json", etc.)
*
* only returns immediate subcommands (no nested underscores beyond the prefix).
* tries to extract description from the JSON "description" field if available.
*
* note: this filesystem-based discovery is used as a fallback when the
* command's own help_result doesn't list subcommands. it enables completion
* for subcommands that were indexed from separate manpages or help runs. *)
let subcommands_of dirs command =
let prefix = filename_of_command command ^ "_" in
let prefix_len = String.length prefix in
let module SMap = Map.Make(String) in
let subs = List.fold_left (fun subs directory ->
if is_dir directory then
Array.fold_left (fun subs filename ->
if not (String.starts_with ~prefix filename) then subs
else
let is_json = Filename.check_suffix filename ".json" in
match chop_extension filename with
| None -> subs
| Some base_name ->
let rest = String.sub base_name prefix_len (String.length base_name - prefix_len) in
(* skip nested subcommands and empty names *)
if String.contains rest '_' || String.length rest = 0 then subs
else if SMap.mem rest subs then subs
else
(* try to read the description from the JSON file *)
let desc = if is_json then
match read_file (Filename.concat directory filename) with
| Some data ->
(try json_to_string (json_get "description" (parse_json data))
with _ -> "")
| None -> ""
else "" in
SMap.add rest { name = rest; desc } subs
) subs (Sys.readdir directory)
else subs
) SMap.empty dirs in
SMap.fold (fun _ sc acc -> sc :: acc) subs [] |> List.rev
(* list all indexed commands across all store directories.
* returns a sorted, deduplicated list of command names. *)
let all_commands dirs =
let module SSet = Set.Make(String) in
List.fold_left (fun cmds directory ->
if is_dir directory then
Array.fold_left (fun cmds filename ->
match chop_extension filename with
| Some base_name -> SSet.add (command_of_filename base_name) cmds
| None -> cmds
) cmds (Sys.readdir directory)
else cmds
) SSet.empty dirs
|> SSet.elements
(* determine how a command was indexed: "help", "manpage", "native", etc.
* for JSON files, reads the "source" field. for .nu files, returns "native".
* used by the "dump" command to show provenance. *)
let file_type_of dirs command =
let base_name = filename_of_command command in
List.find_map (fun directory ->
let json_path = Filename.concat directory (base_name ^ ".json") in
if Sys.file_exists json_path then
(match read_file json_path with
| Some data ->
(try Some (json_to_string (json_get "source" (parse_json data)))
with _ -> Some "json")
| None -> Some "json")
else
let nu_path = Filename.concat directory (base_name ^ ".nu") in
if Sys.file_exists nu_path then Some "native"
else None
) dirs

813
nix/inshellah-completer.nu Normal file
View file

@ -0,0 +1,813 @@
@complete external
def --wrapped sudo [...args] {
^sudo ...$args
}
@complete external
def --wrapped doas [...args] {
^doas ...$args
}
let inshellah_nonempty = { |items|
let result = ($items | default [] | compact)
if ($result | is-empty) { null } else { $result }
}
let inshellah_fuzzy_score = { |needle, haystack|
let needle = $needle | default "" | into string
let haystack = $haystack | default "" | into string
let needle_len = ($needle | str length)
let haystack_len = ($haystack | str length)
if $needle_len == 0 {
1
} else if $needle_len > $haystack_len {
0
} else if $needle == $haystack {
1000
} else {
let needle_lc = $needle | str downcase
let haystack_lc = $haystack | str downcase
if ($haystack_lc | str starts-with $needle_lc) {
900 + (($needle_len * 100) // $haystack_len)
} else {
let needle_chars = $needle_lc | split chars
let haystack_chars = $haystack | split chars
let haystack_lc_chars = $haystack_lc | split chars
let scored = (
$haystack_lc_chars
| enumerate
| reduce --fold {needle_idx: 0, score: 0, prev_match: -2} { |it, acc|
if $acc.needle_idx >= $needle_len {
$acc
} else if $it.item == ($needle_chars | get $acc.needle_idx) {
let idx = $it.index
let prev = if $idx == 0 { "" } else { $haystack_chars | get ($idx - 1) }
let current = $haystack_chars | get $idx
let boundary = (
($idx == 0)
or ($prev == "-")
or ($prev == "_")
or (($prev =~ '^[a-z]$') and ($current =~ '^[A-Z]$'))
)
let base = if $boundary { 50 } else { 10 }
let consecutive = if $acc.prev_match == ($idx - 1) { 20 } else { 0 }
{
needle_idx: ($acc.needle_idx + 1)
score: ($acc.score + $base + $consecutive)
prev_match: $idx
}
} else {
$acc
}
}
)
if $scored.needle_idx == $needle_len { $scored.score } else { 0 }
}
}
}
let inshellah_filter_candidates = { |items, prefix|
let result = do $inshellah_nonempty $items
if $result == null {
null
} else if ($prefix | is-empty) {
$result
} else {
let needle = $prefix | into string
let filtered = (
$result
| enumerate
| each { |row| $row.item | insert __idx $row.index }
| insert __score { |item| do $inshellah_fuzzy_score $needle $item.value }
| where { |item|
let value = ($item.value | into string)
let desc = ($item.description? | default "" | into string | str downcase)
let exact_command = ($value == $needle) and (($desc | str contains "subcommand") or $desc == "external command")
($item.__score > 0) and not $exact_command
}
| insert __rank { |item| 0 - $item.__score }
| sort-by __rank __idx
| reject __idx __score __rank
)
do $inshellah_nonempty $filtered
}
}
let inshellah_static_complete = { |spans|
try {
let completed = (^inshellah complete ...$spans | complete)
if $completed.exit_code != 0 {
null
} else {
let parsed = (try { $completed.stdout | from json } catch { null })
let parsed_type = ($parsed | describe)
if $parsed == null {
null
} else if (($parsed_type | str starts-with "list") or ($parsed_type | str starts-with "table")) {
do $inshellah_nonempty $parsed
} else {
null
}
}
} catch {
null
}
}
let inshellah_unit_candidates = { |scope, prefix|
try {
^systemctl ...$scope list-units --all --no-pager --plain --full --no-legend $"($prefix)*"
| lines
| each { |l|
let parsed = $l | parse -r '(?P<unit>\S+)\s+\S+\s+\S+\s+\S+\s+(?P<desc>.*)'
if ($parsed | length) > 0 {
{value: $parsed.0.unit, description: ($parsed.0.desc | str trim)}
}
} | compact
} catch { null }
}
let inshellah_kubectl_scope = { |spans|
let all_namespaces = ("-A" in $spans) or ("--all-namespaces" in $spans)
let namespace_eq = ($spans | where { |s| $s =~ '^--namespace=' } | get 0? | default "")
let namespace_arg = (
$spans
| enumerate
| where { |it| $it.item == "-n" or $it.item == "--namespace" }
| reverse
| get 0?
| default null
)
let namespace = if not ($namespace_eq | is-empty) {
$namespace_eq | str replace --regex '^--namespace=' ''
} else if $namespace_arg != null and (($namespace_arg.index + 1) < ($spans | length)) {
$spans | get ($namespace_arg.index + 1)
} else {
""
}
if $all_namespaces {
{args: [--all-namespaces], all: true}
} else if not ($namespace | is-empty) {
{args: [-n $namespace], all: false}
} else {
{args: [], all: false}
}
}
let inshellah_kubectl_names = { |kind, spans|
if ($kind | is-empty) or ($kind | str starts-with "-") {
null
} else {
let scope = do $inshellah_kubectl_scope $spans
let columns = if $scope.all {
"custom-columns=NAMESPACE:.metadata.namespace,NAME:.metadata.name"
} else {
"custom-columns=NAME:.metadata.name"
}
try {
let rows = (
^kubectl get $kind ...$scope.args --no-headers -o $columns
| lines
| str trim
| where { |n| not ($n | is-empty) }
)
if $scope.all {
$rows | each { |row|
let parts = $row | split row -r '\s+'
if ($parts | length) >= 2 {
{value: ($parts | get 1), description: $"($kind) in ($parts | get 0)"}
}
} | compact
} else {
$rows | each { |n| {value: $n, description: $kind} }
}
} catch { null }
}
}
let inshellah_git_refs = { ||
try {
^git for-each-ref --format='%(refname:short)%09%(objecttype)%09%(contents:subject)' refs/heads refs/remotes refs/tags
| lines
| each { |l|
let p = $l | split row "\t"
if ($p | length) >= 3 { {value: $p.0, description: $p.2} }
} | compact
} catch { null }
}
let inshellah_git_branches = { ||
try {
^git for-each-ref --format='%(refname:short)%09%(contents:subject)' refs/heads
| lines
| each { |l|
let p = $l | split row "\t"
if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
} | compact
} catch { null }
}
let inshellah_git_tags = { ||
try {
^git for-each-ref --format='%(refname:short)%09%(contents:subject)' refs/tags
| lines
| each { |l|
let p = $l | split row "\t"
if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
} | compact
} catch { null }
}
let inshellah_git_remotes = { ||
try {
^git remote
| lines
| str trim
| where { |r| not ($r | is-empty) }
| each { |r| {value: $r, description: "remote"} }
} catch { null }
}
let inshellah_git_stashes = { ||
try {
^git stash list
| lines
| each { |l|
let m = $l | parse -r '^(?P<stash>stash@\{[0-9]+\}):\s*(?P<desc>.*)$'
if ($m | length) > 0 { {value: $m.0.stash, description: $m.0.desc} }
} | compact
} catch { null }
}
let inshellah_git_status_paths = { ||
try {
^git status --porcelain -uall
| lines
| each { |l|
let m = $l | parse -r '^.. (?P<path>.+)$'
if ($m | length) > 0 {
let raw = $m.0.path
let path = if ($raw | str contains " -> ") { $raw | split row " -> " | last } else { $raw }
{value: $path, description: "changed path"}
}
} | compact
} catch { null }
}
let inshellah_git_tracked_paths = { ||
try {
^git ls-files
| lines
| where { |p| not ($p | is-empty) }
| each { |p| {value: $p, description: "tracked file"} }
} catch { null }
}
let inshellah_git_submodules = { ||
try {
^git config --file .gitmodules --get-regexp '^submodule\..*\.path$'
| lines
| each { |l|
let p = $l | split row -r '\s+'
if ($p | length) >= 2 { {value: $p.1, description: "submodule"} }
} | compact
} catch { null }
}
let inshellah_git_worktrees = { ||
try {
^git worktree list --porcelain
| lines
| each { |l|
let m = $l | parse -r '^worktree\s+(?P<p>.+)$'
if ($m | length) > 0 { {value: $m.0.p, description: ""} }
} | compact
} catch { null }
}
let inshellah_jj_revs = { ||
try {
^jj log --ignore-working-copy --no-graph -r 'all()' -T 'change_id.shortest() ++ "\t" ++ description.first_line() ++ "\n"' err> /dev/null
| lines
| each { |l|
let p = $l | split row "\t"
if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
} | compact
} catch { null }
}
let inshellah_jj_bookmarks = { ||
try {
^jj bookmark list --all-remotes -T 'name ++ "\n"' err> /dev/null
| lines
| str trim
| where { |b| not ($b | is-empty) }
| each { |b| {value: $b, description: "bookmark"} }
} catch { null }
}
let inshellah_jj_tags = { ||
try {
^jj tag list --all-remotes -T 'name ++ "\n"' err> /dev/null
| lines
| str trim
| where { |t| not ($t | is-empty) }
| each { |t| {value: $t, description: "tag"} }
} catch { null }
}
let inshellah_jj_remotes = { ||
try {
^jj git remote list err> /dev/null
| lines
| each { |l|
let p = $l | str trim | split row -r '\s+'
if ($p | length) >= 1 { {value: $p.0, description: ($p | get 1? | default "remote")} }
} | compact
} catch { null }
}
let inshellah_jj_ops = { ||
try {
^jj op log --ignore-working-copy --no-graph -T 'id.short() ++ "\t" ++ description.first_line() ++ "\n"' err> /dev/null
| lines
| each { |l|
let p = $l | split row "\t"
if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
} | compact
} catch { null }
}
let inshellah_jj_files = { ||
try {
^jj file list --ignore-working-copy err> /dev/null
| lines
| str trim
| where { |p| not ($p | is-empty) }
| each { |p| {value: $p, description: "repo file"} }
} catch { null }
}
let inshellah_jj_workspaces = { ||
try {
^jj workspace list -T 'name ++ "\n"' err> /dev/null
| lines
| str trim
| where { |w| not ($w | is-empty) }
| each { |w| {value: $w, description: "workspace"} }
} catch { null }
}
let inshellah_complete = { |spans|
let completions = do $inshellah_static_complete $spans
let span_len = ($spans | length)
let last_span = if $span_len > 0 { $spans | last } else { "" }
let prev_span = if $span_len >= 2 { $spans | get ($span_len - 2) } else { "" }
let sub = if $span_len >= 2 { $spans | get 1 } else { "" }
let additional = if ($completions == null and $span_len > 0) {
match $spans.0 {
"nix" => {
if $span_len < 2 {
null
} else {
try {
let nix_output = (
with-env { NIX_GET_COMPLETIONS: ($span_len - 1) } {
$spans | run-external $in
}
| split row -r '\n'
| str trim
| skip 1
| where { |e| not ($e | is-empty) }
)
if (($nix_output | length) < 6 and
$last_span =~ "[a-zA-Z][a-zA-Z0-9_-]*#[a-zA-Z][a-zA-Z0-9_-]*") {
with-env { NIX_ALLOW_UNFREE: "1" NIX_ALLOW_BROKEN: "1" } {
$nix_output | par-each { |e|
try {
{value: $e, description: (^nix eval --raw --impure $e --apply "f: f.meta.description" err> /dev/null)}
} catch {
{value: $e, description: ""}
}
}
}
} else {
$nix_output | each { |e| {value: $e, description: ""} }
}
} catch { null }
}
}
"systemctl" => {
let unit_verbs = [
"status" "show" "cat" "help" "start" "stop" "restart" "reload" "try-restart"
"reload-or-restart" "reload-or-try-restart" "isolate" "kill" "reset-failed"
"enable" "disable" "reenable" "preset" "mask" "unmask" "is-active" "is-failed"
"is-enabled" "edit"
]
let args = $spans | skip 1 | where { |s| not ($s | str starts-with "-") }
let verb = $args | get 0? | default ""
if (($verb in $unit_verbs) and $span_len >= 3) {
let scope = if ("--user" in $spans) { [--user] } else { [] }
do $inshellah_unit_candidates $scope $last_span
} else { null }
}
"journalctl" => {
if ($prev_span == "--unit" or $prev_span == "-u") {
let scope = if ("--user-unit" in $spans or "--user" in $spans) { [--user] } else { [] }
do $inshellah_unit_candidates $scope $last_span
} else { null }
}
"coredumpctl" => {
let unit_verbs = ["dump" "info" "debug" "list"]
if (($sub in $unit_verbs) and $span_len >= 3) {
let units = (do $inshellah_unit_candidates [] $last_span | default [])
let pids = (try {
^coredumpctl list --no-pager --no-legend
| lines
| each { |l|
let p = $l | split row -r '\s+'
if ($p | length) >= 5 { {value: $p.4, description: $"PID ($p.4) ($p | get 9? | default "")"} }
} | compact
} catch { [] })
$units | append $pids
} else { null }
}
"loginctl" => {
let user_verbs = ["user-status" "show-user" "enable-linger" "disable-linger" "kill-user" "terminate-user"]
let session_verbs = ["session-status" "show-session" "activate" "lock-session" "unlock-session" "terminate-session" "kill-session"]
if (($sub in $user_verbs) and $span_len >= 3) {
try {
^loginctl list-users --no-pager --no-legend
| lines | each { |l|
let p = $l | str trim | split row -r '\s+'
if ($p | length) >= 2 { {value: $p.1, description: $"UID ($p.0)"} }
} | compact
} catch { null }
} else if (($sub in $session_verbs) and $span_len >= 3) {
try {
^loginctl list-sessions --no-pager --no-legend
| lines | each { |l|
let p = $l | str trim | split row -r '\s+'
if ($p | length) >= 3 { {value: $p.0, description: $"user ($p.2)"} }
} | compact
} catch { null }
} else { null }
}
"machinectl" => {
let machine_verbs = ["status" "show" "start" "login" "shell" "enable" "disable" "poweroff" "reboot" "terminate" "kill" "bind" "copy-to" "copy-from"]
if (($sub in $machine_verbs) and $span_len >= 3) {
try {
^machinectl list --no-pager --no-legend
| lines | each { |l|
let p = $l | str trim | split row -r '\s+'
if ($p | length) >= 1 { {value: $p.0, description: ($p | get 1? | default "")} }
} | compact
} catch { null }
} else { null }
}
"networkctl" => {
let link_verbs = ["status" "show" "up" "down" "renew" "forcerenew" "reconfigure" "delete"]
if (($sub in $link_verbs) and $span_len >= 3) {
try {
^networkctl list --no-pager --no-legend
| lines | each { |l|
let p = $l | str trim | split row -r '\s+'
if ($p | length) >= 4 { {value: $p.1, description: $"($p.2) ($p.3)"} }
} | compact
} catch { null }
} else { null }
}
"hostnamectl" | "timedatectl" | "localectl" => {
null
}
"ssh" | "scp" | "sftp" => {
let cfg_hosts = (try {
open ~/.ssh/config | lines | each { |l|
let m = $l | parse -r '(?i)^\s*Host\s+(?P<h>.+)$'
if ($m | length) > 0 { $m.0.h | split row -r '\s+' } else { [] }
} | flatten | where { |h| not ($h | str contains '*') and not ($h | is-empty) }
} catch { [] })
let known = (try {
open ~/.ssh/known_hosts | lines | each { |l|
($l | split row -r '\s+' | get 0? | default "") | split row ','
} | flatten | where { |h| (not ($h | is-empty)) and (not ($h | str starts-with '|')) and (not ($h | str starts-with '[')) }
} catch { [] })
$cfg_hosts | append $known | uniq | each { |h| {value: $h, description: ""} }
}
"docker" | "podman" => {
let need_container = ["exec" "logs" "inspect" "start" "stop" "restart" "rm" "kill" "attach" "cp" "top" "wait" "pause" "unpause" "port" "commit" "diff" "export"]
let need_image = ["run" "rmi" "tag" "push" "pull" "history" "save" "create"]
if ($sub in $need_container) {
try {
^($spans.0) ps -a --format '{{.Names}}\t{{.Image}}'
| lines | each { |l|
let p = $l | split row "\t"
if ($p | length) >= 2 { {value: $p.0, description: $p.1} }
} | compact
} catch { null }
} else if ($sub in $need_image) {
try {
^($spans.0) images --format '{{.Repository}}:{{.Tag}}\t{{.Size}}'
| lines | each { |l|
let p = $l | split row "\t"
if (($p | length) >= 2) and (not ($p.0 | str ends-with ':<none>')) {
{value: $p.0, description: $p.1}
}
} | compact
} catch { null }
} else { null }
}
"kubectl" => {
let resource_verbs = ["get" "describe" "delete" "edit" "scale" "annotate" "label"]
if (($sub in $resource_verbs) and $span_len >= 4) {
let kind = $spans | get 2? | default ""
do $inshellah_kubectl_names $kind $spans
} else if (($sub == "logs" or $sub == "exec" or $sub == "port-forward") and $span_len >= 3) {
do $inshellah_kubectl_names "pods" $spans
} else if ($sub == "rollout" and $span_len >= 5) {
let action = $spans | get 2? | default ""
let kind = $spans | get 3? | default ""
if ($action in ["history" "pause" "restart" "resume" "status" "undo"]) {
do $inshellah_kubectl_names $kind $spans
} else { null }
} else { null }
}
"git" => {
let git_verbs = [
"add" "bisect" "branch" "checkout" "cherry-pick" "clone" "commit" "diff"
"fetch" "grep" "init" "log" "merge" "mv" "pull" "push" "rebase" "reflog"
"remote" "reset" "restore" "revert" "rm" "show" "stash" "status"
"submodule" "switch" "tag" "worktree"
]
let ref_verbs = ["checkout" "merge" "rebase" "log" "diff" "show" "reset" "cherry-pick" "revert" "tag" "blame" "bisect"]
let branch_verbs = ["switch" "branch"]
let remote_verbs = ["add" "rename" "remove" "rm" "set-head" "set-branches" "get-url" "set-url" "show" "prune" "update"]
let stash_verbs = ["push" "save" "list" "show" "drop" "pop" "apply" "branch" "clear" "create" "store"]
let submodule_verbs = ["add" "status" "init" "deinit" "update" "set-branch" "set-url" "summary" "foreach" "sync" "absorbgitdirs"]
let bisect_verbs = ["start" "bad" "good" "new" "old" "terms" "skip" "next" "reset" "visualize" "view" "replay" "log" "run"]
let git_args = $spans | skip 2 | where { |s| not ($s | is-empty) and not ($s | str starts-with "-") }
if $span_len <= 2 {
$git_verbs | each { |v| {value: $v, description: "git subcommand"} }
} else if ($sub == "worktree") {
let worktree_verb = $spans | get 2? | default ""
if $span_len <= 3 {
["add" "list" "lock" "move" "prune" "remove" "repair" "unlock"] | each { |v| {value: $v, description: "worktree subcommand"} }
} else if ($worktree_verb in ["remove" "move" "lock" "unlock" "repair"]) {
do $inshellah_git_worktrees
} else if ($worktree_verb == "add" and $span_len >= 5) {
do $inshellah_git_refs
} else { null }
} else if ($sub == "remote" and $span_len >= 3) {
let remote_verb = $spans | get 2? | default ""
if $span_len <= 3 {
$remote_verbs | each { |v| {value: $v, description: "remote subcommand"} }
} else if ($remote_verb in ["rename" "remove" "rm" "set-head" "set-branches" "get-url" "set-url" "show" "prune" "update"]) {
do $inshellah_git_remotes
} else { null }
} else if (($sub in ["fetch" "push" "pull"]) and $span_len >= 3) {
if ($git_args | is-empty) {
do $inshellah_git_remotes
} else {
do $inshellah_git_refs
}
} else if ($sub == "stash" and $span_len >= 3) {
let stash_verb = $spans | get 2? | default ""
if $span_len <= 3 {
$stash_verbs | each { |v| {value: $v, description: "stash subcommand"} }
} else if ($stash_verb in ["show" "drop" "pop" "apply" "store"]) {
do $inshellah_git_stashes
} else if ($stash_verb == "branch" and ($git_args | length) >= 2) {
do $inshellah_git_stashes
} else { null }
} else if ($sub == "submodule" and $span_len >= 3) {
let submodule_verb = $spans | get 2? | default ""
if $span_len <= 3 {
$submodule_verbs | each { |v| {value: $v, description: "submodule subcommand"} }
} else if ($submodule_verb in ["status" "init" "deinit" "update" "set-branch" "set-url" "summary" "foreach" "sync"]) {
do $inshellah_git_submodules
} else { null }
} else if ($sub == "bisect" and $span_len >= 3) {
let bisect_verb = $spans | get 2? | default ""
if $span_len <= 3 {
$bisect_verbs | each { |v| {value: $v, description: "bisect subcommand"} }
} else if ($bisect_verb in ["bad" "good" "new" "old" "skip" "reset" "start"]) {
do $inshellah_git_refs
} else { null }
} else if ($sub == "tag" and $span_len >= 3) {
if (["-d" "--delete" "-v" "--verify"] | any { |f| $f in $spans }) {
do $inshellah_git_tags
} else if ($span_len >= 4) {
do $inshellah_git_refs
} else {
do $inshellah_git_tags
}
} else if ($sub == "add" and $span_len >= 3) {
do $inshellah_git_status_paths
} else if ($sub == "restore" and $span_len >= 3) {
if ($prev_span == "--source" or $prev_span == "-s") {
do $inshellah_git_refs
} else {
do $inshellah_git_status_paths
}
} else if ($sub == "rm" and $span_len >= 3) {
do $inshellah_git_tracked_paths
} else if ($sub == "mv" and $span_len >= 3) {
if ($git_args | is-empty) { do $inshellah_git_tracked_paths } else { null }
} else if ($sub == "checkout" and $span_len >= 3) {
if ($prev_span in ["-b" "-B" "--orphan"]) { null } else { do $inshellah_git_refs }
} else if ($sub == "switch" and $span_len >= 3) {
if ($prev_span in ["-c" "-C" "--create" "--force-create" "--orphan"]) { null } else { do $inshellah_git_branches }
} else if (($sub in $branch_verbs) and $span_len >= 3) {
do $inshellah_git_branches
} else if (($sub in $ref_verbs) and $span_len >= 3) {
do $inshellah_git_refs
} else { null }
}
"jj" => {
let jj_verbs = [
"abandon" "absorb" "bookmark" "commit" "describe" "diff" "diffedit"
"duplicate" "edit" "evolog" "file" "git" "interdiff" "log" "new"
"operation" "op" "rebase" "resolve" "restore" "revert" "show" "sparse"
"split" "squash" "status" "tag" "undo" "workspace" "b" "ci" "desc" "st"
]
let rev_flags = [
"-r" "--revision" "--revisions" "--from" "--to" "-s" "--source"
"-d" "--destination" "--insert-after" "--insert-before" "--before"
"--after" "--onto" "--change"
]
let rev_verbs = [
"abandon" "absorb" "describe" "diff" "diffedit" "duplicate" "edit"
"evolog" "interdiff" "log" "metaedit" "new" "parallelize" "rebase"
"restore" "revert" "show" "sign" "simplify-parents" "split" "squash"
"unsign"
]
let bookmark_verbs = ["advance" "create" "delete" "forget" "list" "move" "rename" "set" "track" "untrack"]
let jj_git_verbs = ["clone" "colocation" "export" "fetch" "import" "init" "push" "remote" "root"]
let jj_remote_verbs = ["add" "list" "remove" "rename" "set-url"]
let op_verbs = ["abandon" "diff" "integrate" "log" "restore" "revert" "show"]
let file_verbs = ["annotate" "chmod" "list" "search" "show" "track" "untrack"]
let workspace_verbs = ["add" "forget" "list" "rename" "root" "update-stale"]
let sparse_verbs = ["edit" "list" "reset" "set"]
let jj_args = $spans | skip 2 | where { |s| not ($s | is-empty) and not ($s | str starts-with "-") }
if ($prev_span in $rev_flags) {
do $inshellah_jj_revs
} else if ($prev_span == "--remote") {
do $inshellah_jj_remotes
} else if ($prev_span == "--at-operation" or $prev_span == "--at-op") {
do $inshellah_jj_ops
} else if $span_len <= 2 {
$jj_verbs | each { |v| {value: $v, description: "jj subcommand"} }
} else if ($sub == "bookmark" or $sub == "b") {
let verb = $spans | get 2? | default ""
if $span_len <= 3 {
$bookmark_verbs | each { |v| {value: $v, description: "bookmark subcommand"} }
} else if ($verb in ["delete" "forget" "move" "rename" "set" "track" "untrack" "advance"]) {
do $inshellah_jj_bookmarks
} else { null }
} else if ($sub == "tag") {
let verb = $spans | get 2? | default ""
if $span_len <= 3 {
["delete" "list" "set"] | each { |v| {value: $v, description: "tag subcommand"} }
} else if ($verb in ["delete" "set"]) {
do $inshellah_jj_tags
} else { null }
} else if ($sub == "git") {
let git_verb = $spans | get 2? | default ""
let remote_verb = $spans | get 3? | default ""
if $span_len <= 3 {
$jj_git_verbs | each { |v| {value: $v, description: "jj git subcommand"} }
} else if ($git_verb == "remote") {
if $span_len <= 4 {
$jj_remote_verbs | each { |v| {value: $v, description: "remote subcommand"} }
} else if ($remote_verb in ["remove" "rename" "set-url"]) {
do $inshellah_jj_remotes
} else { null }
} else if ($git_verb in ["fetch" "push"]) {
do $inshellah_jj_remotes
} else { null }
} else if ($sub == "operation" or $sub == "op") {
let verb = $spans | get 2? | default ""
if $span_len <= 3 {
$op_verbs | each { |v| {value: $v, description: "operation subcommand"} }
} else if ($verb in ["abandon" "diff" "integrate" "restore" "revert" "show"]) {
do $inshellah_jj_ops
} else { null }
} else if ($sub == "file") {
let verb = $spans | get 2? | default ""
if $span_len <= 3 {
$file_verbs | each { |v| {value: $v, description: "file subcommand"} }
} else if ($verb in ["annotate" "chmod" "list" "search" "show" "untrack"]) {
do $inshellah_jj_files
} else { null }
} else if ($sub == "workspace") {
let verb = $spans | get 2? | default ""
if $span_len <= 3 {
$workspace_verbs | each { |v| {value: $v, description: "workspace subcommand"} }
} else if ($verb in ["forget" "update-stale"]) {
do $inshellah_jj_workspaces
} else { null }
} else if ($sub == "sparse") {
if $span_len <= 3 {
$sparse_verbs | each { |v| {value: $v, description: "sparse subcommand"} }
} else { null }
} else if ($sub in ["diff" "log"] and ($jj_args | is-empty)) {
do $inshellah_jj_files
} else if ($sub in $rev_verbs and $span_len >= 3) {
do $inshellah_jj_revs
} else { null }
}
"npm" | "pnpm" | "yarn" => {
let wants = (
(($spans.0 == "yarn") and $span_len == 2)
or (($sub == "run" or $sub == "run-script") and $span_len == 3)
)
if $wants {
try {
open package.json | get scripts? | default {} | transpose name cmd
| each { |row| {value: $row.name, description: $row.cmd} }
} catch { null }
} else { null }
}
"make" => {
if $span_len <= 2 {
try {
open Makefile | lines
| each { |l|
let m = $l | parse -r '^(?P<t>[A-Za-z0-9_./-]+)\s*:'
if (($m | length) > 0) and (not ($m.0.t | str starts-with '.')) {
{value: $m.0.t, description: ""}
}
} | compact | uniq-by value
} catch { null }
} else { null }
}
"just" => {
if $span_len <= 2 {
try {
^just --list --unsorted
| lines | skip 1
| each { |l|
let m = $l | parse -r '^\s+(?P<t>[A-Za-z0-9_-]+)(?:\s+\S.*)?(?:\s*#\s*(?P<d>.*))?$'
if ($m | length) > 0 {
{value: $m.0.t, description: ($m.0.d? | default "")}
}
} | compact
} catch { null }
} else { null }
}
"cargo" => {
let target_flags = ["--bin" "--example" "--test" "--bench"]
if ($prev_span == "-p" or $prev_span == "--package") {
try {
^cargo metadata --no-deps --format-version 1
| from json
| get packages
| each { |pkg| {value: $pkg.name, description: ($pkg.version? | default "")} }
| uniq-by value
} catch { null }
} else if ($prev_span in $target_flags) {
let kind = $prev_span | str replace "--" ""
try {
^cargo metadata --no-deps --format-version 1
| from json
| get packages
| each { |pkg|
$pkg.targets
| where { |t| $kind in $t.kind }
| each { |t| {value: $t.name, description: ($t.kind | str join ",")} }
}
| flatten
| uniq-by value
} catch { null }
} else { null }
}
"kill" | "pkill" => {
try {
^ps -eo pid,comm --no-headers
| lines
| each { |l|
let parts = $l | str trim | split row -r '\s+'
if ($parts | length) >= 2 {
let pid = $parts | get 0
let comm = $parts | skip 1 | str join " "
if ($spans.0 == "kill") { {value: $pid, description: $comm} }
else { {value: $comm, description: $pid} }
}
} | compact
} catch { null }
}
_ => { null }
}
} else { null }
if $completions == null {
do $inshellah_filter_candidates $additional $last_span
} else {
$completions
}
}
$env.config.completions.external = {enable: true, max_results: 200, completer: $inshellah_complete}

View file

@ -10,7 +10,7 @@
#
# Usage:
# { pkgs, ... }: {
# imports = [ ./path/to/inshellah/nix/module.nix ];
# imports = [ ./path/to/inshellah-rs/nix/module.nix ];
# programs.inshellah.enable = true;
# }
@ -23,6 +23,34 @@
let
cfg = config.programs.inshellah;
completerSnippet = ./inshellah-completer.nu;
dynamicStubCommands = [
"systemctl"
"journalctl"
"coredumpctl"
"loginctl"
"machinectl"
"networkctl"
"hostnamectl"
"timedatectl"
"localectl"
"ssh"
"scp"
"sftp"
"docker"
"podman"
"kubectl"
"git"
"jj"
"npm"
"pnpm"
"yarn"
"make"
"just"
"cargo"
"pkill"
];
dynamicStubCommandArgs = lib.escapeShellArgs dynamicStubCommands;
in
{
options.programs.inshellah = {
@ -72,9 +100,33 @@ in
'';
};
timeoutMs = lib.mkOption {
type = lib.types.nullOr lib.types.int;
default = null;
example = 200;
description = ''
per-subprocess timeout in milliseconds. when null the binary's
compiled-in default is used (currently 200ms).
'';
};
workers = lib.mkOption {
type = lib.types.nullOr lib.types.int;
default = null;
example = 8;
description = ''
worker thread count for the parallel scrape pool. when null,
`std::thread::available_parallelism` is used.
'';
};
snippet = lib.mkOption {
type = lib.types.str;
readOnly = true;
default = builtins.readFile completerSnippet;
description = ''
nushell external completer snippet installed by the module.
'';
};
};
@ -98,7 +150,10 @@ in
(lib.hiPrio wrapped)
cfg.package
];
environment.pathsToLink = [ "/share/nushell/autoload" ];
environment.pathsToLink = [
"/share/nushell/autoload"
"/share/nushell/vendor/autoload"
];
environment.extraSetup =
let
inshellah = "${cfg.package}/bin/inshellah";
@ -109,30 +164,38 @@ in
lib.concatStringsSep "\n" cfg.helpOnlyCommands
);
helpOnlyFlag = lib.optionalString (cfg.helpOnlyCommands != [ ]) " --help-only ${helpOnlyFile}";
timeoutFlag = lib.optionalString (cfg.timeoutMs != null) " --timeout-ms ${toString cfg.timeoutMs}";
workersFlag = lib.optionalString (cfg.workers != null) " --workers ${toString cfg.workers}";
snippetFile = pkgs.writeText "inshellah-completer.nu" cfg.snippet;
in
''
mkdir -p ${destDir}
if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then
${inshellah} index "$out" --dir ${destDir}${ignoreFlag}${helpOnlyFlag} \
${inshellah} index "$out" --dir ${destDir}${ignoreFlag}${helpOnlyFlag}${timeoutFlag}${workersFlag} \
2>/dev/null || true
fi
find ${destDir} -maxdepth 1 -empty -delete
# nushell hardcodes sudo and doas to bypass the external completer,
# returning command-name completion instead of calling inshellah.
# these @complete external stubs override that so inshellah handles
# their flags and elevation stripping. placed in the nushell autoload
# dir so they are sourced automatically at shell startup.
# Install the full nushell completer plus sudo/doas wrapped commands.
# Nushell otherwise hardcodes sudo/doas to bypass external completers.
mkdir -p $out/share/nushell/vendor/autoload
cat > $out/share/nushell/vendor/autoload/inshellah-elevation.nu << 'NUSHELL'
@complete external
extern "sudo" []
cp ${snippetFile} $out/share/nushell/vendor/autoload/inshellah.nu
@complete external
extern "doas" []
NUSHELL
# Register command names for dynamic backends that are actually present
# in the linked profile. The externs keep Nu's command list aware of
# these commands while the external completer still supplies arguments.
stubFile=$out/share/nushell/vendor/autoload/inshellah-command-stubs.nu
: > "$stubFile"
for cmd in ${dynamicStubCommandArgs}; do
if [ -x "$out/bin/$cmd" ]; then
printf '@complete external\nextern "%s" [...args]\n\n' "$cmd" >> "$stubFile"
fi
done
if [ ! -s "$stubFile" ]; then
rm -f "$stubFile"
fi
'';
};
}

4
src/lib.rs Normal file
View file

@ -0,0 +1,4 @@
pub mod parsers;
pub mod pool;
pub mod store;
pub mod types;

2241
src/main.rs Normal file

File diff suppressed because it is too large Load diff

187
src/parsers/help.rs Normal file
View file

@ -0,0 +1,187 @@
mod description;
mod helpers;
mod options;
mod positionals;
mod subcommands;
pub use options::{param_parser, parse_usage_flags, switch_parser};
pub use positionals::{
extract_cli11_positionals, extract_usage_positionals, parse_usage_args, skip_command_name,
};
use std::collections::HashMap;
use crate::{
parsers::help::{description::description, helpers::get_indent, subcommands::subcommand_entry},
types::*,
};
use nom::{IResult, Parser, character::complete::space0, combinator::opt};
use crate::make_parser;
type EntryParts<'a> = (
&'a str,
(Switch<'a>, Option<Param<'a>>),
(&'a str, Vec<&'a str>),
);
// parse a single flag entry: indent + switch + optional param + description.
make_parser!(entry -> OptionEntry<'a>,
(
space0,
(switch_parser, opt(param_parser)),
description,
)
=> |(_, (switch, param), (first, cont))
: EntryParts<'a>|
{
let mut desc: Vec<&str> = Vec::with_capacity(1 + cont.len());
if !first.trim().is_empty() { desc.push(first); }
desc.extend(cont.into_iter().filter(|l| !l.trim().is_empty()));
OptionEntry { switch, param, desc }
}
);
/// dedup raw subcommands by case-insensitive name, keeping the entry with
/// the longest description. preserves first-seen ordering.
fn dedup_subcommands<'a>(raw: Vec<Subcommand<'a>>) -> Vec<Subcommand<'a>> {
let mut by_name: HashMap<String, Subcommand<'a>> = HashMap::new();
let mut order: Vec<String> = Vec::new();
for sc in raw {
let key = sc.name.to_ascii_lowercase();
match by_name.get(&key) {
Some(prev) if prev.desc.len() >= sc.desc.len() => {}
_ => {
if !by_name.contains_key(&key) {
order.push(key.clone());
}
by_name.insert(key, sc);
}
}
}
order
.into_iter()
.map(|k| by_name.remove(&k).unwrap())
.collect()
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum HelpSection {
Unknown,
Options,
Commands,
Other,
}
fn classify_section_line(line: &str) -> Option<HelpSection> {
let (idx, indent) = get_indent(line);
if indent > 4 {
return None;
}
let trimmed = line[idx..].trim();
if trimmed.is_empty() {
return None;
}
let without_colon = trimmed.trim_end_matches(':').trim();
let lower = without_colon.to_ascii_lowercase();
if lower.starts_with("usage") {
return Some(HelpSection::Unknown);
}
if lower.starts_with("valid arguments")
|| lower.contains(" is one of the following")
|| lower.contains(" defaults to")
|| lower == "examples"
|| lower == "example"
{
return Some(HelpSection::Other);
}
let command_header = matches!(lower.as_str(), "command" | "commands" | "subcommands")
|| lower.ends_with(" commands")
|| lower.ends_with(" subcommands");
if command_header && !lower.contains("option") && !lower.contains("flag") {
return Some(HelpSection::Commands);
}
if lower.contains("argument")
|| lower == "args"
|| lower == "positionals"
|| lower == "positional arguments"
{
return Some(HelpSection::Other);
}
if lower.contains("option") || lower.contains("flag") || trimmed.ends_with(':') {
return Some(HelpSection::Options);
}
None
}
fn consume_line(s: &str) -> &str {
match s.find('\n') {
Some(idx) => &s[idx + 1..],
None => "",
}
}
fn parser_made_progress(original: &str, rem: &str) -> bool {
rem.len() < original.len()
}
/// build the final HelpResult by scanning help text with lightweight section
/// awareness. options are accepted in option-like sections and before a
/// section is known; subcommands are accepted only in command-like sections.
fn build_help_result<'a>(original: &'a str) -> HelpResult<'a> {
let mut entries = Vec::new();
let mut raw_subcommands: Vec<Subcommand<'a>> = Vec::new();
let mut section = HelpSection::Unknown;
let mut rem = original;
while !rem.is_empty() {
let line = rem.split_once('\n').map(|(line, _)| line).unwrap_or(rem);
if let Some(next_section) = classify_section_line(line) {
section = next_section;
rem = consume_line(rem);
continue;
}
if matches!(section, HelpSection::Unknown | HelpSection::Options)
&& let Ok((next, parsed)) = entry(rem)
&& parser_made_progress(rem, next)
{
entries.push(parsed);
rem = next;
continue;
}
if section == HelpSection::Commands
&& let Ok((next, parsed)) = subcommand_entry(rem)
&& parser_made_progress(rem, next)
{
raw_subcommands.push(parsed);
rem = next;
continue;
}
rem = consume_line(rem);
}
let subcommands = dedup_subcommands(raw_subcommands);
// cli11 positional section takes priority over the usage-line scan
// when both are present — cli11 carries types and optionality.
let positionals = match extract_cli11_positionals(original) {
Ok((_, p)) if !p.is_empty() => p,
_ => extract_usage_positionals(original)
.map(|(_, p)| p)
.unwrap_or_default(),
};
HelpResult {
entries,
subcommands,
positionals,
desc: "",
}
}
/// top-level help parser.
pub fn help_parser(s: &str) -> IResult<&str, HelpResult<'_>> {
Ok(("", build_help_result(s)))
}

View file

@ -0,0 +1,37 @@
use nom::{
IResult, Parser,
character::complete::space0,
combinator::verify,
multi::many0,
sequence::{preceded, terminated},
};
use crate::make_parser;
use crate::parsers::help::helpers::{at_least_indent, eol, rest_of_line};
// continuation line: an indented (≥8 visual cols), non-flag-shaped line
// belonging to the previous flag's description. blank-but-indented lines
// are accepted (content = ""), filtered out by the caller's join.
make_parser!(continuation_line -> &'a str,
verify(
preceded(
// assert ≥8 visual cols of leading horizontal whitespace
// without consuming — space0 inside `rest_of_line`'s preceded
// will eat them next.
at_least_indent(8),
terminated(preceded(space0, rest_of_line), eol)
),
// reject lines whose first non-space char is '-' — that's a new
// flag entry, not a continuation of the previous one.
|content: &&str| !content.starts_with('-')
)
);
// description: the line of text after the switch+param, plus any
// continuation lines. always succeeds — first line may be empty (when
// the switch is followed immediately by a newline, "clap long" style).
make_parser!(pub description -> (&'a str, Vec<&'a str>),
(
terminated(preceded(space0, rest_of_line), eol),
many0(continuation_line),
));

105
src/parsers/help/helpers.rs Normal file
View file

@ -0,0 +1,105 @@
use nom::{
AsChar, IResult, Parser, branch::alt, bytes::complete::take_till,
character::complete::line_ending, combinator::eof,
};
#[allow(unused_imports)]
use nom::{bytes::complete::take_while, combinator::peek, combinator::verify};
#[macro_export]
macro_rules! make_parser {
(pub $name:ident -> $out:ty, $parser:expr => $wrap:expr) => {
#[allow(clippy::needless_lifetimes)]
#[allow(mismatched_lifetime_syntaxes)]
pub fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
let (rem, val) = $parser.parse(s)?;
Ok((rem, $wrap(val)))
}
};
(pub $name:ident -> $out:ty, $parser:expr) => {
#[allow(clippy::needless_lifetimes)]
#[allow(mismatched_lifetime_syntaxes)]
pub fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
$parser.parse(s)
}
};
($name:ident -> $out:ty, $parser:expr => $wrap:expr) => {
#[allow(clippy::needless_lifetimes)]
#[allow(mismatched_lifetime_syntaxes)]
fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
let (rem, val) = $parser.parse(s)?;
Ok((rem, $wrap(val)))
}
};
($name:ident -> $out:ty, $parser:expr) => {
#[allow(clippy::needless_lifetimes)]
#[allow(mismatched_lifetime_syntaxes)]
fn $name<'a>(s: &'a str) -> IResult<&'a str, $out> {
$parser.parse(s)
}
};
}
#[macro_export]
macro_rules! make_predicate {
(pub $name:ident, |$c:ident| $($body:tt)*) => {
pub fn $name($c: char) -> bool { $($body)* }
};
($name:ident, |$c:ident| $($body:tt)*) => {
fn $name($c: char) -> bool { $($body)* }
};
}
make_predicate!(pub is_option_char, |c| c.is_alphanumeric() || matches!(c, '-' | '_'));
make_parser!(pub rest_of_line -> &'a str,
take_till(|c: char| c.is_newline())
);
// end of line — matches either a newline or end of input.
// permissive version used in most line-consuming parsers.
make_parser!(pub eol -> &'a str, alt((line_ending, eof)));
/// compute the visual indent of a leading whitespace run.
/// spaces count 1, tabs count 8 (typical terminal default).
pub fn visual_indent(s: &str) -> u8 {
s.chars().fold(0u8, |acc, c| {
acc.saturating_add(match c {
' ' => 1,
'\t' => 8,
_ => 0,
})
})
}
/// nom-shaped check that the input begins with at least `min` visual
/// columns of horizontal whitespace (spaces or tabs). doesn't consume —
/// pair with `space0`/`take_while` to actually eat the indent.
pub fn at_least_indent<'a>(
min: u8,
) -> impl Parser<&'a str, Output = &'a str, Error = nom::error::Error<&'a str>> {
verify(
peek(take_while(|c: char| c == ' ' || c == '\t')),
move |s: &str| visual_indent(s) >= min,
)
}
/// legacy helper: returns (byte index of first non-space, visual indent).
/// used by callers that still need the byte index.
pub fn get_indent(s: &str) -> (usize, u8) {
let mut traversed = 0;
let mut indent = 0;
for (i, c) in s.char_indices() {
let incr = match c {
' ' => 1,
'\t' => 8,
_ => 0,
};
if incr == 0 {
traversed = i;
break;
} else {
indent += incr;
}
}
(traversed, indent)
}

192
src/parsers/help/options.rs Normal file
View file

@ -0,0 +1,192 @@
use crate::make_parser;
use crate::parsers::help::helpers::is_option_char;
use crate::types::*;
use nom::bytes::complete::{take_till, take_till1};
use nom::character::complete::{space0, space1};
use nom::combinator::{map, opt};
use nom::multi::many0;
use nom::sequence::separated_pair;
use nom::{
IResult, Parser,
branch::alt,
bytes::complete::{tag, take_while1},
character::complete::{char, satisfy},
combinator::{value, verify},
sequence::{delimited, preceded},
};
make_parser!(short_switch -> char,
preceded(char('-'), satisfy(|c| c.is_alphanumeric())));
make_parser!(long_switch -> &'a str,
preceded(tag("--"), take_while1(is_option_char)));
make_parser!(negatable_long_switch -> &'a str,
preceded(tag("--[no-]"), take_while1(is_option_char)));
make_parser!(comma -> (),
value((), preceded(char(','), space0)));
make_parser!(eq_optional_param -> Param<'a>,
delimited(tag("[="), take_while1(is_option_char), char(']')) => Param::Optional);
make_parser!(eq_optional_angle_param -> Param<'a>,
delimited(tag("[=<"), take_till1(|c| c == '>'), tag(">]")) => Param::Optional);
make_parser!(eq_mandatory_param -> Param<'a>,
preceded(char('='), take_while1(is_option_char)) => Param::Mandatory);
// take a wide alphanumeric/_/- token then verify the WHOLE thing looks
// like an ALL_CAPS-style param name. taking only uppercase chars would
// match just "N" of " Needs: ..." and leave "eeds:..." as desc, so we
// widen, then reject anything that doesn't pass the all-caps check.
make_parser!(spaced_uppercase_param -> Param<'a>,
preceded(
char(' '),
verify(
take_while1(|c: char|
c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_' || c == '-'
),
|s: &str| {
let first = match s.chars().next() { Some(c) => c, None => return false };
if !(first.is_ascii_uppercase() || first == '_') { return false; }
s.chars().all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
}
)
) => Param::Mandatory);
make_parser!(spaced_angle_param -> Param<'a>,
preceded(char(' '), delimited(char('<'), take_till1(|c| c == '>'), char('>'))) => Param::Mandatory);
make_parser!(spaced_opt_angle_param -> Param<'a>,
preceded(char(' '), delimited(char('<'),
delimited(char('['), take_while1(|c| c != ']'), char(']')),
char('>'))) => Param::Optional);
make_parser!(spaced_angle_param_after_space -> Param<'a>,
preceded(space1, delimited(char('<'), take_till1(|c| c == '>'), char('>'))) => Param::Mandatory);
// take the full lowercase token then verify it's <=10 chars. a
// take_while_m_n with a 10-char cap would leave a partial match — e.g.
// "--foo nanoseconds" would extract param "nanosecond" and leave "s" as
// the description. a word longer than 10 chars is almost certainly the
// start of the description, not a type annotation.
make_parser!(spaced_type_param -> Param<'a>,
preceded(
char(' '),
verify(
take_while1(|c: char| !c.is_whitespace()),
|s: &str| s.len() <= 10 && s.chars().all(|c| c.is_ascii_lowercase())
)
) => Param::Mandatory
);
make_parser!(pub param_parser -> Param<'a>, alt((
eq_optional_angle_param,
eq_optional_param,
eq_mandatory_param,
spaced_opt_angle_param,
spaced_angle_param_after_space,
spaced_angle_param,
spaced_uppercase_param,
spaced_type_param,
)));
macro_rules! switch_pair {
($name:ident, $left:expr, $sep:expr, $right:expr => |$a:ident, $b:ident| $body:expr) => {
fn $name<'a>(s: &'a str) -> IResult<&'a str, Switch<'a>> {
use nom::sequence::separated_pair;
let (rem, ($a, $b)) = separated_pair($left, $sep, $right).parse(s)?;
Ok((rem, $body))
}
};
}
switch_pair!(short_comma_long,
short_switch, comma, long_switch => |s, l| Switch::Both(s, l));
switch_pair!(short_comma_negatable_long,
short_switch, comma, negatable_long_switch => |s, l| Switch::Both(s, l));
switch_pair!(short_space_long,
short_switch, char(' '), long_switch => |s, l| Switch::Both(s, l));
switch_pair!(short_space_negatable_long,
short_switch, char(' '), negatable_long_switch => |s, l| Switch::Both(s, l));
make_parser!(slash_sep -> (),
value((), delimited(space0, char('/'), space0)));
switch_pair!(long_slash_short,
long_switch, slash_sep, short_switch => |l, s| Switch::Both(s, l));
make_parser!(short_as_switch -> Switch<'a>, short_switch => Switch::Short);
make_parser!(negatable_long_as_switch -> Switch<'a>, negatable_long_switch => Switch::Long);
make_parser!(long_as_switch -> Switch<'a>, long_switch => Switch::Long);
make_parser!(pub switch_parser -> Switch<'a>,
alt((
short_comma_negatable_long,
short_space_negatable_long,
short_comma_long,
short_space_long,
long_slash_short,
short_as_switch,
negatable_long_as_switch,
long_as_switch,
))
);
// `{--long | -s}` — manpage SYNOPSIS-line switch pair. nix-env's
// synopsis is the canonical case: `[{--file | -f} path] [{--profile |
// -p} path]`. emits Switch::Both with the long name.
make_parser!(brace_pipe_long_short -> Switch<'a>,
separated_pair(long_switch, (space0, char('|'), space0), short_switch)
=> |(l, s): (&'a str, char)| Switch::Both(s, l)
);
make_parser!(brace_pipe_short_long -> Switch<'a>,
separated_pair(short_switch, (space0, char('|'), space0), long_switch)
=> |(s, l): (char, &'a str)| Switch::Both(s, l)
);
make_parser!(brace_pipe_switch -> Switch<'a>,
delimited(
(char('{'), space0),
alt((brace_pipe_long_short, brace_pipe_short_long)),
(space0, char('}'))
)
);
make_parser!(usage_switch_parser -> Switch<'a>,
alt((brace_pipe_switch, switch_parser))
);
// consume any chars except `]`. used to swallow trailing tokens inside a
// flag bracket — e.g. `[--option name value]` keeps switch=Long("option")
// and param=Mandatory("name"), discarding ` value` before the closing `]`.
make_parser!(take_till_bracket -> &'a str, take_till(|c: char| c == ']'));
// `[<switch> [param] <junk>]` inside the SYNOPSIS line.
make_parser!(flag_in_bracket -> (Switch<'a>, Option<Param<'a>>),
delimited(
(char('['), space0),
(usage_switch_parser, opt(param_parser)),
(take_till_bracket, char(']'))
)
);
// walk the joined SYNOPSIS-line text, collecting every flag-bracketed
// switch + its first param. non-flag tokens (positional brackets,
// command name, ellipses) are skipped one char at a time.
make_parser!(pub parse_usage_flags -> Vec<(Switch<'a>, Option<Param<'a>>)>,
many0(alt((
map(flag_in_bracket, Some),
// `value(None, ...)` requires `None: Clone` which forces Clone
// on Switch/Param; `map(..., |_| None)` doesn't.
map(satisfy(|c| c != '\n' && c != '\r'), |_| None),
)))
=> |v: Vec<Option<(Switch<'a>, Option<Param<'a>>)>>|
v.into_iter().flatten().collect()
);

View file

@ -0,0 +1,400 @@
use crate::parsers::help::helpers::rest_of_line;
use crate::types::Positional;
use crate::{make_parser, make_predicate};
use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case, take_till, take_till1, take_while, take_while1};
use nom::character::complete::{char, line_ending, satisfy, space0, space1};
use nom::combinator::{map, not, opt, peek, recognize, value, verify};
use nom::multi::many0;
use nom::sequence::{delimited, preceded, terminated};
use nom::{AsChar, IResult, Parser};
#[derive(Clone)]
enum PositionalParse<'a> {
Curly,
Flag,
Skip,
Mandatory(&'a str),
Optional(&'a str),
ManVariadic(&'a str),
OptVariadic(&'a str),
}
make_predicate!(is_word_char, |c| c.is_alphanumeric()
|| matches!(c, '-' | '_' | '/' | '.'));
make_predicate!(is_pos_char, |c| c.is_ascii_uppercase()
|| c.is_numeric()
|| matches!(c, '_' | '-'));
make_parser!(section_label -> (),
value((), alt((
tag_no_case("options"),
tag_no_case("option"),
tag_no_case("flags"),
tag_no_case("flag")
)))
);
make_parser!(ellipses -> (),
value((),
alt((tag("..."), tag("\u{2026}")))
)
);
make_parser!(braces -> PositionalParse<'a>,
value(PositionalParse::Curly, delimited(char('{'), take_till1(|c| c == '}'), char('}')))
);
// FIXME should this be a take_while is_option_char?
// why tf do we have a ']' condition
make_parser!(flag -> PositionalParse<'a>,
value(PositionalParse::Flag, preceded(char('-'), take_till1(|c: char| c.is_space() || c == ']')))
);
fn check_positional(s: &str) -> bool {
let s = s.trim();
if s.is_empty() {
return false;
}
// reject names starting with '-' — these are flag tokens accidentally
// captured by the bracket parser, e.g. "[--at-operation]" in jj's
// synopsis. without this guard every `[--flag]` token would be
// recorded as a positional named "--flag".
if s.starts_with('-') {
return false;
}
if section_label.parse(s).is_ok() {
return false;
}
let upper = s.to_ascii_uppercase();
if matches!(upper.as_str(), "OPTIONS" | "OPTION" | "FLAGS" | "FLAG") {
return false;
}
s.chars()
.all(|c| c.is_alphanumeric() || matches!(c, '-' | '_' | '/' | '.'))
}
// recognize a balanced `[...]` block, tolerating ONE level of nested
// brackets inside. expressed entirely via nom combinators:
//
// `[` + many0(alt((nested_bracket_block, non_bracket_char))) + `]`
//
// nested_bracket_block is `[ chars_until_] ]`, which means we accept a
// single inner `[...]` correctly but not arbitrarily-deep nesting —
// manpages don't go deeper than two levels.
// returns the inner content (everything between the outer brackets).
make_parser!(balanced_bracket_inner -> &'a str,
recognize(delimited(
char('['),
many0(alt((
recognize((char('['), take_till(|c: char| c == ']'), char(']'))),
recognize(satisfy(|c: char| c != ']' && c != '[')),
))),
char(']'),
))
=> |whole: &'a str| &whole[1..whole.len() - 1]
);
/// extract a positional name from already-trimmed bracket-inner content.
/// returns the name slice and a flag indicating whether the bracket inner
/// carried a trailing `...` (in-bracket variadic marker).
fn parse_bracket_inner_name(inner: &str) -> Option<(&str, bool)> {
let inner = inner.trim();
// strip trailing "..." for in-bracket variadic.
let (rest, has_dots) = if let Some(stripped) = inner.strip_suffix("...") {
(stripped.trim_end(), true)
} else if let Some(stripped) = inner.strip_suffix('\u{2026}') {
(stripped.trim_end(), true)
} else {
(inner, false)
};
if rest.starts_with('[') {
let mut found = None;
let mut remaining = rest;
while let Some(start) = remaining.find('[') {
let after_start = &remaining[start + 1..];
let Some(end) = after_start.find(']') else {
break;
};
let nested = &after_start[..end];
if let Some((nested_name, nested_dots)) = parse_bracket_inner_name(nested)
&& check_positional(nested_name)
{
found = Some((nested_name, has_dots || nested_dots));
}
remaining = &after_start[end + 1..];
}
return found;
}
let name = if let Some(after_lt) = rest.strip_prefix('<') {
// angle-bracket name: take everything up to the matching '>'
let end = after_lt.find('>')?;
let inner = after_lt[..end].trim();
let (inner, inner_dots) = if let Some(stripped) = inner.strip_suffix("...") {
(stripped.trim_end(), true)
} else if let Some(stripped) = inner.strip_suffix('\u{2026}') {
(stripped.trim_end(), true)
} else {
(inner, false)
};
return Some((inner, has_dots || inner_dots));
} else {
// bare name: take leading word
let end = rest
.find(|c: char| c.is_whitespace() || c == '[' || c == ']')
.unwrap_or(rest.len());
if end == 0 {
return None;
}
&rest[..end]
};
Some((name, has_dots))
}
// extract a balanced `[...]` block and decompose its inner content into
// (name, has-inner-`...` flag). `map_opt` turns a `None` from
// `parse_bracket_inner_name` into a nom parse error.
make_parser!(opt_bracket_name -> (&'a str, bool),
nom::combinator::map_opt(balanced_bracket_inner, parse_bracket_inner_name)
);
make_parser!(
opt_positional -> PositionalParse<'a>,
verify(
// tuple parser: (name + in-bracket variadic, post-bracket ellipsis).
// matches "[name]", "[name...]", "[name ...]", "[name] ...",
// "[<name>]", and one-level nests like "[<program> [<arg>...]]".
(opt_bracket_name, opt(ellipses)),
|((name, _), _): &((&'a str, bool), Option<()>)| check_positional(name)
) => |((name, has_inner_dots), post_dots): ((&'a str, bool), Option<()>)| {
if has_inner_dots || post_dots.is_some() {
PositionalParse::OptVariadic(name)
} else {
PositionalParse::Optional(name)
}
}
);
make_parser!(man_positional -> PositionalParse<'a>,
verify(
(
delimited(
char('<'),
(
take_till1(|c| c == '.' || c == '\u{2026}' || c == '>'),
opt(ellipses)
),
char('>')
),
opt(ellipses)
),
|((ss, _), _)| check_positional(ss)
) => |((p, v), v1): ((&'a str, Option<()>), Option<()>)|
if v.is_some() || v1.is_some() { PositionalParse::ManVariadic(p) }
else { PositionalParse::Mandatory(p) }
);
make_parser!(allcaps_positional -> PositionalParse<'a>,
verify(
(
preceded(
peek(
satisfy(|c: char| c.is_ascii_uppercase())
),
take_while1(is_pos_char)
),
opt(
alt((
tag("..."),
tag("\u{2026}"))
)
)
),
|(ss, _): &(&str, _)| check_positional(ss)
) => |(p, v): (&'a str, Option<&'a str>)|
if v.is_some() { PositionalParse::ManVariadic(p) } else { PositionalParse::Mandatory(p) }
);
fn caseless_push<'a>(k: &'a str, v: Positional, acc: &mut Vec<(&'a str, Positional)>) {
let dupe = acc.iter().any(|(ik, _)| ik.eq_ignore_ascii_case(k));
if !dupe {
acc.push((k, v));
}
}
// parse_usage_args runs on a single logical usage line. SKIP refuses to
// cross a newline boundary so many0 stops at end-of-line — without this
// the parser would happily wander into the OPTIONS section and treat
// every `--flag <name>` angle-bracket parameter as a positional.
//
// the inner positional terminator uses peek(line_ending) instead of
// consuming the newline, so the trailing `opt(line_ending)` in the
// outer delimited eats it cleanly and we never advance past the usage
// line.
make_parser!(pub parse_usage_args -> Vec<(&'a str, Positional)>,
(delimited(
space0,
many0(
alt((
map(
(
terminated(
alt((
braces,
opt_positional,
value(PositionalParse::Skip, balanced_bracket_inner),
man_positional,
flag,
allcaps_positional,
)),
alt((
space1,
value("", peek(line_ending)),
value("", peek(nom::combinator::eof)),
))
),
// catch "[section] ..." patterns where the ellipsis is
// on the *next* token, separated by whitespace.
opt(terminated(
alt((tag("..."), tag("\u{2026}"))),
alt((
space1,
value("", peek(line_ending)),
value("", peek(nom::combinator::eof)),
))
))
),
|(positional, trailing): (PositionalParse<'a>, Option<_>)| {
if trailing.is_none() { positional }
else {
match positional {
PositionalParse::Optional(n) => PositionalParse::OptVariadic(n),
PositionalParse::Mandatory(n) => PositionalParse::ManVariadic(n),
other => other,
}
}
}
),
// SKIP must NOT consume a newline. without this, many0 keeps
// iterating past the usage line into OPTIONS-section flag
// syntax and over-extracts positionals.
value(PositionalParse::Skip, satisfy(|c: char| c != '\n' && c != '\r')),
))
),
opt((space0, line_ending))
)) => |p: Vec<PositionalParse<'a>>|
p.into_iter().fold(Vec::new(), |mut acc, parse|
{
match parse {
PositionalParse::Curly => (),
PositionalParse::Flag => (),
PositionalParse::Skip => (),
PositionalParse::OptVariadic(arg) => caseless_push(arg, Positional {
optional: true,
variadic: true
}, &mut acc),
PositionalParse::ManVariadic(arg) => caseless_push(arg, Positional {
optional: false,
variadic: true
}, &mut acc),
PositionalParse::Optional(arg) => caseless_push(arg, Positional {
optional: true,
variadic: false,
}, &mut acc),
PositionalParse::Mandatory(arg) => caseless_push(arg, Positional {
optional: false,
variadic: false
}, &mut acc),
}
acc
})
);
make_parser!(pub skip_command_name -> (),
value((), preceded(space0,
many0(
(
verify(
preceded(not(char('-')), take_while1(is_word_char)),
|ss: &str| ss.chars().any(|c: char| c.is_ascii_lowercase())
),
space1
)
)
))
);
make_parser!(find_usage_line -> (),
value((), preceded(
space0,
terminated(
tag_no_case("usage"),
// accept any of:
// "Usage:" — inline form with colon
// "Usage args" — inline form, space follows the word
// "USAGE\n cmd args" — clap-style header on its own line
alt(
(
value((), char(':')),
value((), peek(line_ending)),
value((), peek(satisfy(|c: char| c == ' ' || c == '\t'))),
)
)
)
))
);
make_parser!(pub extract_usage_positionals -> Vec<(&'a str, Positional)>,
preceded(
many0(preceded(not(find_usage_line), (rest_of_line, line_ending))),
preceded(
(find_usage_line, space0, opt(line_ending), space0, skip_command_name),
parse_usage_args
)
)
);
make_predicate!(is_cli11_name_char, |c| c.is_alphanumeric()
|| matches!(c, '_' | '-'));
make_parser!(cli11_section_header -> (),
value((),
delimited(
space0,
alt((tag("POSITIONALS:"), tag("Positionals:"))),
(rest_of_line, opt(line_ending))
)
)
);
make_parser!(cli11_pos_line -> (&'a str, bool),
preceded(
verify(space0, |ss: &str| !ss.is_empty()),
terminated(
(
verify(take_while1(is_cli11_name_char), |s: &str| s.len() >= 2),
preceded(
(space0, take_while(|c: char| c.is_ascii_uppercase()), space0),
opt(tag("..."))
)
),
(rest_of_line, opt(line_ending))
)
) => |(name, variadic): (&'a str, Option<_>)| (name, variadic.is_some())
);
make_parser!(parse_cli11_body -> Vec<(&'a str, Positional)>,
many0(cli11_pos_line) => |entries: Vec<(&'a str, bool)>|
entries.into_iter().fold(Vec::new(), |mut acc, (name, variadic)| {
caseless_push(name, Positional { optional: false, variadic }, &mut acc);
acc
})
);
make_parser!(pub extract_cli11_positionals -> Vec<(&'a str, Positional)>,
preceded(
many0(preceded(not(cli11_section_header), (rest_of_line, line_ending))),
preceded(cli11_section_header, parse_cli11_body)
)
);

View file

@ -0,0 +1,83 @@
use nom::{
AsChar, IResult, Parser,
branch::alt,
bytes::complete::{tag, take_till, take_while1},
character::complete::{char, space0},
combinator::{not, value, verify},
multi::many0,
sequence::{delimited, preceded, terminated},
};
use crate::make_parser;
use crate::parsers::help::helpers::{eol, is_option_char};
use crate::types::Subcommand;
fn is_placeholder(c: char) -> bool {
match c {
_ if c.is_alphanumeric() => true,
'_' | '-' | '.' | '|' | ',' => true,
_ => false,
}
}
/// chars allowed inside a bare (unbracketed) placeholder token, e.g.
/// "FILE", "PATTERN...", "A|B". excludes lowercase letters so mixed-case
/// description words like "NixOS" or "Home-manager" don't get swallowed
/// as placeholders.
fn is_bare_placeholder_char(c: char) -> bool {
matches!(c, 'A'..='Z' | '0'..='9' | '_' | '-' | '.' | '|' | ',')
}
make_parser!(
skip_arg_placeholders -> (),
value(
(),
many0(preceded(
// peek ahead one char (don't consume) so the per-branch parser can
// see the full token. needed because the bare ALL_CAPS branch must
// verify the *entire* token before deciding to consume.
char(' '),
alt((
// <...> bracketed placeholder
delimited(char('<'), take_while1(is_placeholder), char('>')),
// [...] optional bracketed placeholder
delimited(char('['), take_while1(is_placeholder), char(']')),
// bare ALL_CAPS placeholder — first char must be uppercase or
// a digit (allows e.g. "N", "M2"), and the whole token must
// be uppercase-friendly. rejects "NixOS"-style mixed-case so
// descriptions don't get swallowed.
verify(
take_while1(is_bare_placeholder_char),
|s: &str| {
let first = s.chars().next().unwrap();
first.is_ascii_uppercase() || first.is_ascii_digit()
}
),
)),
)),
)
);
// parse a subcommand entry: leading whitespace, then a name (2+ option
// chars, not starting with '-'), optional argument placeholders, exactly
// two spaces, optional padding, then the description text and eol.
make_parser!(pub subcommand_entry -> Subcommand<'a>,
(
preceded(
space0,
verify(
preceded(not(char('-')), take_while1(is_option_char)),
|n: &str| n.len() >= 2,
),
),
skip_arg_placeholders,
tag(" "),
space0,
terminated(take_till(|c: char| c.is_newline()), eol),
) => |(name, _, _, _, desc): (&'a str, _, _, _, &'a str)| {
// some help formats prefix desc with "- " (manpage-style); strip it.
let d = desc.trim_start();
let desc = d.strip_prefix("- ").map(|s| s.trim_start()).unwrap_or(d);
Subcommand { name, desc }
}
);

335
src/parsers/manpage.rs Normal file
View file

@ -0,0 +1,335 @@
//! parse unix manpages (groff/mdoc format) into a structured result.
//!
//! manpages are written in roff/groff markup — a decades-old typesetting language
//! used by man(1). this module strips the formatting and extracts structured data
//! (flags, subcommands, positionals) from the raw groff source.
//!
//! there are two major manpage macro packages:
//! - man (groff) — used by gnu/linux tools. uses macros like .SH, .TP, .IP, .PP
//! - mdoc (bsd) — used by bsd tools. uses .Sh, .Fl, .Ar, .Op, .It, .Bl/.El
//!
//! this module handles both, auto-detecting the format by checking for .Sh macros.
//!
//! for groff manpages, flag extraction uses multiple "strategies" that target
//! different common formatting patterns:
//! - strategy_tp: .TP tagged paragraphs (gnu coreutils, help2man)
//! - strategy_ip: .IP indented paragraphs (curl, hand-written)
//! - strategy_pp_rs: .PP + .RS/.RE blocks (git, docbook)
//! - strategy_nix: nix3-style bullet .IP with .UR/.UE hyperlinks
//! - strategy_deroff: fallback — strip all groff, feed to help text parser
//!
//! the module tries all applicable strategies and picks the one that extracts
//! the most flag entries, on the theory that more results = better match.
mod commands;
mod groff;
mod mdoc;
mod sections;
mod strategies;
use std::io::{self, Read};
use std::path::Path;
use crate::types::{HelpResult, OptionEntry, Param, Positional, Subcommand, Switch};
pub use self::groff::{GroffLine, classify_line, strip_groff_escapes};
pub use self::sections::{extract_subcommand_sections, extract_synopsis_command};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum OwnedSwitch {
Short(char),
Long(String),
Both(char, String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum OwnedParam {
Mandatory(String),
Optional(String),
}
#[derive(Debug, Clone)]
pub struct ManpageEntry {
pub switch: OwnedSwitch,
pub param: Option<OwnedParam>,
pub desc: String,
}
#[derive(Debug, Clone)]
pub struct ManpageSubcommand {
pub name: String,
pub desc: String,
}
#[derive(Debug, Clone, Default)]
pub struct ManpageResult {
pub entries: Vec<ManpageEntry>,
pub subcommands: Vec<ManpageSubcommand>,
pub positionals: Vec<(String, Positional)>,
pub description: String,
}
impl From<&Switch<'_>> for OwnedSwitch {
fn from(s: &Switch<'_>) -> Self {
match s {
Switch::Short(c) => OwnedSwitch::Short(*c),
Switch::Long(l) => OwnedSwitch::Long((*l).to_string()),
Switch::Both(c, l) => OwnedSwitch::Both(*c, (*l).to_string()),
}
}
}
impl From<&Param<'_>> for OwnedParam {
fn from(p: &Param<'_>) -> Self {
match p {
Param::Mandatory(s) => OwnedParam::Mandatory((*s).to_string()),
Param::Optional(s) => OwnedParam::Optional((*s).to_string()),
}
}
}
impl From<&OptionEntry<'_>> for ManpageEntry {
fn from(e: &OptionEntry<'_>) -> Self {
let desc: String = e
.desc
.iter()
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(" ");
ManpageEntry {
switch: (&e.switch).into(),
param: e.param.as_ref().map(Into::into),
desc,
}
}
}
impl From<&Subcommand<'_>> for ManpageSubcommand {
fn from(sc: &Subcommand<'_>) -> Self {
// lowercase the subcommand name here so (a) file naming is
// consistent (meat_yum.json vs meat_YUM.json) and (b) recursive
// --help probes use the lowercase form, which is what most real
// CLIs accept — even tools like meat that DISPLAY uppercase
// names in their help text dispatch on the lowercased argument.
ManpageSubcommand {
name: sc.name.to_ascii_lowercase(),
desc: sc.desc.to_string(),
}
}
}
impl From<&HelpResult<'_>> for ManpageResult {
fn from(r: &HelpResult<'_>) -> Self {
ManpageResult {
entries: r.entries.iter().map(Into::into).collect(),
subcommands: r.subcommands.iter().map(Into::into).collect(),
// positional names are stored lowercased so output is
// stable across the various places we extract them from
// (synopsis, usage, cli11 sections).
positionals: r
.positionals
.iter()
.map(|(k, v)| (k.to_ascii_lowercase(), v.clone()))
.collect(),
description: r.desc.to_string(),
}
}
}
/// parse a manpage from its classified lines.
/// auto-detects mdoc vs groff format. for groff, runs the multi-strategy
/// extraction pipeline.
pub fn parse_manpage_lines(lines: &[GroffLine]) -> ManpageResult {
if mdoc::is_mdoc(lines) {
mdoc::parse_mdoc_lines(lines)
} else {
let options_section = sections::extract_options_section(lines);
let mut entries = strategies::extract_entries(&options_section);
// merge SYNOPSIS-only flags (nix-env's `[{--profile | -p} path]`
// pattern, where the flag is declared in the synopsis but never
// listed as an entry in the OPTIONS body). body entries take
// precedence on duplicate names — they carry the descriptions.
let synopsis_flags = sections::extract_synopsis_flags(lines);
if !synopsis_flags.is_empty() {
let have_long: std::collections::HashSet<String> = entries
.iter()
.filter_map(|e| match &e.switch {
OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => Some(l.to_ascii_lowercase()),
_ => None,
})
.collect();
let have_short: std::collections::HashSet<char> = entries
.iter()
.filter_map(|e| match &e.switch {
OwnedSwitch::Short(c) | OwnedSwitch::Both(c, _) => Some(*c),
_ => None,
})
.collect();
for e in synopsis_flags {
let dup = match &e.switch {
OwnedSwitch::Long(l) => have_long.contains(&l.to_ascii_lowercase()),
OwnedSwitch::Short(c) => have_short.contains(c),
OwnedSwitch::Both(c, l) => {
have_short.contains(c) || have_long.contains(&l.to_ascii_lowercase())
}
};
if !dup {
entries.push(e);
}
}
}
let positionals = sections::extract_synopsis_positionals(lines);
let commands_section = sections::extract_commands_section(lines);
let mut subcommands = commands::extract_subcommands_from_commands(&commands_section);
for positional in sections::extract_description_positionals(lines) {
if !subcommands
.iter()
.any(|sc| sc.name.eq_ignore_ascii_case(&positional.name))
{
subcommands.push(positional);
}
}
ManpageResult {
entries,
subcommands,
positionals,
description: String::new(),
}
}
}
/// parse a manpage from its raw string contents.
/// splits into lines, parses, then extracts the NAME section description.
pub fn parse_manpage_string(contents: &str) -> ManpageResult {
let lines: Vec<GroffLine> = contents.split('\n').map(classify_line).collect();
let mut result = parse_manpage_lines(&lines);
if let Some(desc) = sections::extract_name_description(&lines) {
result.description = desc;
}
result
}
/// parse a manpage and also pull out clap-style `.SH SUBCOMMAND` sections
/// as separate per-subcommand results. each subcommand section in a
/// clap-generated manpage is its own command with its own flags; the
/// parent's subcommand list is populated from their names.
///
/// returns (main_result, sub_results) where each sub_result has
/// name=full_command ("nh os"), desc, and its own ManpageResult.
pub fn parse_manpage_with_subs(contents: &str) -> (ManpageResult, Vec<(String, ManpageResult)>) {
let lines: Vec<GroffLine> = contents.split('\n').map(classify_line).collect();
let mut result = parse_manpage_lines(&lines);
if let Some(desc) = sections::extract_name_description(&lines) {
result.description = desc;
}
let sub_sections = sections::extract_subcommand_sections(&lines);
if !sub_sections.is_empty() {
// overwrite subcommands with the SUBCOMMAND-section names —
// these are the authoritative list for clap-generated manpages.
result.subcommands = sub_sections
.iter()
.map(|(name, desc, _)| ManpageSubcommand {
name: name.to_ascii_lowercase(),
desc: desc.clone(),
})
.collect();
}
// each SUBCOMMAND section body is parsed via the same strategy-picker
// as the top-level OPTIONS section — clap puts flag definitions
// directly under the .SH SUBCOMMAND header with no inner .SH wrapping,
// so parse_manpage_lines (which looks for a child OPTIONS section)
// would come back empty.
let subs: Vec<(String, ManpageResult)> = sub_sections
.into_iter()
.map(|(name, desc, lines)| {
let entries = strategies::extract_entries(&lines);
let sub_result = ManpageResult {
entries,
subcommands: Vec::new(),
positionals: Default::default(),
description: desc,
};
(name, sub_result)
})
.collect();
(result, subs)
}
/// read a manpage file from disk. handles .gz compressed files (the common
/// case — most installed manpages are gzipped). plain text files are read directly.
pub fn read_manpage_file<P: AsRef<Path>>(path: P) -> io::Result<String> {
let path = path.as_ref();
let bytes = std::fs::read(path)?;
if path.extension().and_then(|e| e.to_str()) == Some("gz") {
let mut decoder = flate2::read::GzDecoder::new(&bytes[..]);
let mut out = String::new();
decoder.read_to_string(&mut out)?;
Ok(out)
} else {
String::from_utf8(bytes).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
}
}
/// read + parse a manpage file in one step.
pub fn parse_manpage_file<P: AsRef<Path>>(path: P) -> io::Result<ManpageResult> {
let contents = read_manpage_file(path)?;
Ok(parse_manpage_string(&contents))
}
#[cfg(test)]
mod tests {
use super::*;
const TP_MANPAGE: &str = r#".TH FOO 1 "2024" "1.0" "User Commands"
.SH NAME
foo \- a synthetic test command
.SH SYNOPSIS
.B foo
[\fIOPTIONS\fR] <input> [output]
.SH OPTIONS
.TP
\fB\-v\fR, \fB\-\-verbose\fR
increase output verbosity
.TP
\fB\-o\fR \fIFILE\fR, \fB\-\-output\fR=\fIFILE\fR
write to FILE
.TP
\fB\-h\fR, \fB\-\-help\fR
show this help and exit
"#;
#[test]
fn tp_strategy_extracts_flags() {
let r = parse_manpage_string(TP_MANPAGE);
assert_eq!(
r.entries.len(),
3,
"expected 3 entries, got {:?}",
r.entries
);
assert_eq!(r.description, "a synthetic test command");
assert!(matches!(
r.entries[0].switch,
OwnedSwitch::Both('v', ref l) if l == "verbose"
));
assert!(matches!(
r.entries[2].switch,
OwnedSwitch::Both('h', ref l) if l == "help"
));
assert!(r.entries[0].desc.contains("verbosity"));
}
#[test]
fn mdoc_format_detected() {
let src = ".Sh NAME\n.Nm test\n.Nd a test\n.Sh DESCRIPTION\nstuff\n";
let lines: Vec<GroffLine> = src.split('\n').map(classify_line).collect();
assert!(mdoc::is_mdoc(&lines));
}
#[test]
fn groff_escapes_stripped() {
let stripped = groff::strip_groff_escapes("\\fB\\-v\\fR \\fIfile\\fR");
assert_eq!(stripped.trim(), "-v file");
}
}

View file

@ -0,0 +1,157 @@
//! COMMANDS section subcommand extraction.
//!
//! some manpages (notably systemctl) have a dedicated COMMANDS section
//! listing subcommands with descriptions. these use .PP + bold name +
//! .RS/.RE blocks:
//! .PP
//! \fBstart\fR \fIUNIT\fR...
//! .RS 4
//! Start (activate) one or more units.
//! .RE
use crate::parsers::manpage::ManpageSubcommand;
use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes, strip_inline_macro_args};
/// validate that the extracted name looks like a subcommand: lowercase,
/// at least 2 chars, no leading dash.
fn is_valid_subcmd(name: &str) -> bool {
name.len() >= 2
&& !name.starts_with('-')
&& name
.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '_')
}
/// extract subcommand name from a bold groff text like
/// "\fBlist\-units\fR [\fIPATTERN\fR...]" -> "list-units"
fn extract_bold_command_name(text: &str) -> Option<String> {
let trimmed = text.trim();
if trimmed.len() >= 4 && trimmed.starts_with("\\fB") {
// look for \fB...\fR at the start: find the next '\\' and take
// the segment between \fB and there.
let after = &trimmed[3..];
let segment_end = after.find('\\').unwrap_or(after.len());
let name_part = &after[..segment_end];
let reconstructed = format!("\\fB{name_part}\\fR");
let name = normalize_command_token(strip_groff_escapes(&reconstructed).trim());
if is_valid_subcmd(&name) {
return Some(name);
}
return None;
}
// fallback: take the first whitespace-delimited word of the stripped text
let stripped = strip_groff_escapes(trimmed);
let first_word = stripped.split_whitespace().next().unwrap_or("");
let name = normalize_command_token(first_word);
if is_valid_subcmd(&name) {
Some(name)
} else {
None
}
}
fn normalize_command_token(token: &str) -> String {
let token = token.trim();
let token = token
.find('(')
.map(|idx| &token[..idx])
.unwrap_or(token)
.trim_end_matches(',');
token.to_string()
}
fn extract_command_name_from_line(line: &GroffLine) -> Option<String> {
match line {
GroffLine::Text(tag) => extract_bold_command_name(tag),
GroffLine::Macro { name, args }
if matches!(
name.as_str(),
"B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"
) =>
{
let rendered = strip_groff_escapes(&strip_inline_macro_args(args));
extract_bold_command_name(&rendered)
}
_ => None,
}
}
/// walk through commands section lines, extracting subcommand name+description
/// pairs from .PP + Text + .RS/.RE blocks.
pub fn extract_subcommands_from_commands(lines: &[GroffLine]) -> Vec<ManpageSubcommand> {
let mut out = Vec::new();
let mut i = 0;
while i < lines.len() {
if let GroffLine::Macro { name, .. } = &lines[i]
&& name == "PP"
{
i += 1;
if i >= lines.len() {
continue;
}
if let Some(name) = extract_command_name_from_line(&lines[i]) {
let (desc, new_i) = collect_subcmd_desc(lines, i + 1);
let short_desc = first_sentence(&desc);
out.push(ManpageSubcommand {
name: name.to_ascii_lowercase(),
desc: short_desc,
});
i = new_i;
continue;
} else {
i += 1;
}
} else {
i += 1;
}
}
out
}
/// collect the description for a subcommand entry. handles .RS/.RE blocks
/// and stops at the next .PP/.SH/.SS boundary.
fn collect_subcmd_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
let mut acc: Vec<String> = Vec::new();
let mut i = start;
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. } if name == "RS" => {
i += 1;
// inside .RS — collect until .RE or boundary
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. } if name == "RE" => {
return (acc.join(" "), i + 1);
}
GroffLine::Text(t) => {
acc.push(t.clone());
i += 1;
}
GroffLine::Macro { name, .. }
if name == "PP" || name == "SH" || name == "SS" =>
{
return (acc.join(" "), i);
}
_ => i += 1,
}
}
return (acc.join(" "), i);
}
GroffLine::Text(t) => {
acc.push(t.clone());
i += 1;
}
_ => return (acc.join(" "), i),
}
}
(acc.join(" "), i)
}
/// take the first sentence (up to '.') as the description.
fn first_sentence(s: &str) -> String {
let s = s.trim();
match s.find('.') {
Some(idx) if idx > 0 => s[..idx].trim().to_string(),
_ => s.to_string(),
}
}

View file

@ -0,0 +1,385 @@
//! groff escape/formatting stripping and line classification.
//!
//! groff escapes start with backslash and use various continuation syntaxes.
//! we strip them, replacing named characters (like \(aq for apostrophe) with
//! their text equivalents and discarding formatting directives.
//!
//! also exports `make_macro_walker!`, the manpage-side analogue of the
//! help parser's `make_parser!`. all of our strategy_* functions are
//! "scan lines, on each .MACRO_NAME run a handler, advance, accumulate"
//! — this macro factors out the loop scaffolding so each strategy reduces
//! to its specific extraction logic.
/// walk a `&[GroffLine]` slice, and on each macro whose name matches
/// `$mname`, invoke the body with `(lines, i, args)` where:
/// - `lines` is the full slice (for slicing further bodies)
/// - `i` is the current index of the matched macro
/// - `args` is the macro's argument string (by reference)
///
/// the body returns `Option<(T, usize)>`. `Some((value, new_i))` pushes
/// `value` and advances the cursor to `new_i` (typically computed as
/// `lines.len() - rest.len()` after `collect_text_lines`). `None`
/// advances by one line and keeps scanning.
///
/// matches the help-parser pattern `make_parser!(name -> T, parser => wrap)`:
/// the macro hides the loop scaffolding, the handler expresses the actual
/// extraction logic.
#[macro_export]
macro_rules! make_macro_walker {
(pub $name:ident -> Vec<$t:ty>, on macro $mname:expr =>
|$lines:ident, $i:ident, $args:ident| $body:expr) => {
pub fn $name(lines_input: &[$crate::parsers::manpage::GroffLine]) -> Vec<$t> {
let mut out = Vec::new();
let mut cursor = 0;
let $lines: &[$crate::parsers::manpage::GroffLine] = lines_input;
while cursor < $lines.len() {
if let $crate::parsers::manpage::GroffLine::Macro {
name: macro_name,
args: $args,
} = &$lines[cursor]
{
if macro_name == $mname {
let $i = cursor;
// wrap the handler body in an IIFE so an early
// `return None` inside the handler returns from the
// closure, not from the surrounding strategy function.
#[allow(clippy::redundant_closure_call)]
let result: Option<($t, usize)> = (|| $body)();
if let Some((value, new_i)) = result {
out.push(value);
cursor = new_i;
continue;
}
}
}
cursor += 1;
}
out
}
};
}
/// every line in a manpage is classified as one of four types.
/// this classification drives all subsequent parsing — strategies
/// pattern-match on sequences of classified lines.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum GroffLine {
/// macro name + args, e.g. ("SH", "OPTIONS") or ("TP", "")
Macro { name: String, args: String },
/// plain text after groff stripping
Text(String),
/// empty line
Blank,
/// groff comment: .backslash-quote or backslash-quote
Comment,
}
/// translate a groff named character escape to its text equivalent.
/// groff uses two-letter codes like "aq" for apostrophe, "lq"/"rq" for
/// left/right quotes, "em"/"en" for dashes.
fn named_char_of(name: &str) -> Option<char> {
match name {
"aq" => Some('\''),
"lq" | "Lq" | "rq" | "Rq" => Some('"'),
"em" | "en" => Some('-'),
_ => None,
}
}
fn is_alnum(c: u8) -> bool {
c.is_ascii_alphanumeric()
}
/// strip groff escape sequences, replacing named characters with text
/// equivalents and discarding formatting directives.
pub fn strip_groff_escapes(source: &str) -> String {
let bytes = source.as_bytes();
let len = bytes.len();
let mut buffer = String::with_capacity(len);
let mut pos = 0;
let mut prev_char: u8 = 0;
while pos < len {
if bytes[pos] == b'\\' && pos + 1 < len {
let next = bytes[pos + 1];
match next {
b'f' => {
// font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...]
if pos + 2 < len {
let font_char = bytes[pos + 2];
// insert space before italic font to preserve word boundaries
// e.g. \fB--max-results\fR\fIcount\fR -> "--max-results count"
if font_char == b'I' && is_alnum(prev_char) {
buffer.push(' ');
prev_char = b' ';
}
if font_char == b'(' {
pos += 5; // \f(XX — two-character font name
} else if font_char == b'[' {
pos += 3;
skip_to_byte(bytes, len, &mut pos, b']');
if pos < len {
pos += 1;
}
} else {
pos += 3; // \fX — single-character font selector
}
} else {
pos += 2;
}
}
b'-' => {
// escaped hyphen-minus — emit a plain hyphen
buffer.push('-');
prev_char = b'-';
pos += 2;
}
b'&' | b'/' | b',' => {
// zero-width characters — discard without output
pos += 2;
}
b'(' => {
// two-char named character: \(aq, \(lq, \(rq, etc.
if pos + 3 < len {
let name = &source[pos + 2..pos + 4];
if let Some(c) = named_char_of(name) {
buffer.push(c);
prev_char = c as u8;
}
pos += 4;
} else {
pos += 2;
}
}
b'[' => {
// bracketed named character: \[aq], \[lq], etc.
pos += 2;
let start = pos;
skip_to_byte(bytes, len, &mut pos, b']');
if pos < len {
let name = &source[start..pos];
if let Some(c) = named_char_of(name) {
buffer.push(c);
prev_char = c as u8;
}
pos += 1;
}
}
b's' => {
// size escape: \sN, \s+N, \s-N — skip the numeric argument
pos += 2;
if pos < len && (bytes[pos] == b'+' || bytes[pos] == b'-') {
pos += 1;
}
if pos < len && bytes[pos].is_ascii_digit() {
pos += 1;
}
if pos < len && bytes[pos].is_ascii_digit() {
pos += 1;
}
}
b'm' => {
// color escape: \m[...] — skip the bracketed color name
pos += 2;
if pos < len && bytes[pos] == b'[' {
pos += 1;
skip_to_byte(bytes, len, &mut pos, b']');
if pos < len {
pos += 1;
}
}
}
b'X' => {
// device control: \X'...' — skip the single-quoted payload
pos += 2;
if pos < len && bytes[pos] == b'\'' {
pos += 1;
skip_to_byte(bytes, len, &mut pos, b'\'');
if pos < len {
pos += 1;
}
}
}
b'*' => {
// string variable: \*X or \*(XX or \*[...] — skip the reference
pos += 2;
skip_groff_reference(bytes, len, &mut pos);
}
b'n' => {
// number register: \nX or \n(XX or \n[...] — skip the reference
pos += 2;
skip_groff_reference(bytes, len, &mut pos);
}
b'e' => {
// escaped backslash literal
buffer.push('\\');
prev_char = b'\\';
pos += 2;
}
b'\\' => {
// double backslash — emit one
buffer.push('\\');
prev_char = b'\\';
pos += 2;
}
b' ' | b'~' => {
// escaped/non-breaking space — emit a regular space
buffer.push(' ');
prev_char = b' ';
pos += 2;
}
_ => {
// unknown escape — skip the two-character sequence
pos += 2;
}
}
} else {
// copy a full utf-8 char from source to buffer
let c = source[pos..].chars().next().unwrap();
buffer.push(c);
prev_char = if c.is_ascii() { c as u8 } else { 0 };
pos += c.len_utf8();
}
}
buffer
}
fn skip_to_byte(bytes: &[u8], len: usize, pos: &mut usize, delim: u8) {
while *pos < len && bytes[*pos] != delim {
*pos += 1;
}
}
/// skip a groff reference that uses one of three sub-forms:
/// single char — e.g. \*X or \nX
/// ( + 2 chars — e.g. \*(XX or \n(XX
/// [ to ] — e.g. \*[name] or \n[name]
fn skip_groff_reference(bytes: &[u8], len: usize, pos: &mut usize) {
if *pos < len {
if bytes[*pos] == b'(' {
*pos += 3; // skip past '(' + two-character name
} else if bytes[*pos] == b'[' {
*pos += 1;
skip_to_byte(bytes, len, pos, b']');
if *pos < len {
*pos += 1;
}
} else {
*pos += 1;
}
}
}
/// strip inline macro formatting: .BI, .BR, .IR, etc.
/// these macros alternate between fonts for their arguments, e.g.:
/// .BI "--output " "FILE"
/// becomes "--outputFILE" (arguments concatenated without spaces).
///
/// quoted strings are kept together (quotes stripped), but unquoted spaces
/// are consumed. this matches groff's actual rendering of these macros.
pub fn strip_inline_macro_args(text: &str) -> String {
let bytes = text.as_bytes();
let len = bytes.len();
let mut buffer = String::with_capacity(len);
let mut pos = 0;
while pos < len {
if bytes[pos] == b'"' {
// quoted argument — copy characters up to the closing quote
pos += 1;
while pos < len && bytes[pos] != b'"' {
let c = text[pos..].chars().next().unwrap();
buffer.push(c);
pos += c.len_utf8();
}
if pos < len {
pos += 1;
}
} else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
// unquoted whitespace — skip (arguments are concatenated)
pos += 1;
} else {
let c = text[pos..].chars().next().unwrap();
buffer.push(c);
pos += c.len_utf8();
}
}
buffer
}
/// render same-font macro arguments (.B/.I) where arguments are separated
/// by spaces. quote delimiters group arguments in roff source but should
/// not become part of the visible text.
pub fn strip_space_macro_args(text: &str) -> String {
strip_groff_escapes(&text.replace('"', ""))
.trim()
.to_string()
}
/// strip escapes and trim whitespace.
pub fn strip_groff(line: &str) -> String {
strip_groff_escapes(line).trim().to_string()
}
/// refined comment detection — the base classify_line may miss some comment
/// forms, so this wrapper checks more carefully before falling through.
fn is_comment_line(line: &str) -> bool {
let bytes = line.as_bytes();
let len = bytes.len();
(len >= 3 && bytes[0] == b'.' && bytes[1] == b'\\' && bytes[2] == b'"')
|| (len >= 2 && bytes[0] == b'\\' && bytes[1] == b'"')
}
/// classify a single line of manpage source.
/// macro lines start with '.' or '\'' (groff alternate control char).
/// the macro name is split from its arguments at the first space/tab.
/// arguments wrapped in double quotes are unquoted.
pub fn classify_line(line: &str) -> GroffLine {
if is_comment_line(line) {
return GroffLine::Comment;
}
let len = line.len();
if len == 0 {
return GroffLine::Blank;
}
let bytes = line.as_bytes();
// base classify also flags dot-backslash forms as comments
if len >= 2 && bytes[0] == b'.' && bytes[1] == b'\\' && (len < 3 || bytes[2] == b'"') {
return GroffLine::Comment;
}
if len >= 3 && bytes[0] == b'\\' && bytes[1] == b'"' {
return GroffLine::Comment;
}
if bytes[0] == b'.' || bytes[0] == b'\'' {
// macro line — extract macro name and arguments
let rest = line[1..].trim();
let split_at = rest.find([' ', '\t']);
match split_at {
Some(idx) => {
let name = rest[..idx].to_string();
let args = rest[idx + 1..].trim();
// strip surrounding quotes from arguments
let args = if args.len() >= 2
&& args.starts_with('"')
&& args.ends_with('"')
&& !args[1..args.len() - 1].contains('"')
{
args[1..args.len() - 1].to_string()
} else {
args.to_string()
};
GroffLine::Macro { name, args }
}
None => GroffLine::Macro {
name: rest.to_string(),
args: String::new(),
},
}
} else {
let stripped = strip_groff(line);
if stripped.is_empty() {
GroffLine::Blank
} else {
GroffLine::Text(stripped)
}
}
}

237
src/parsers/manpage/mdoc.rs Normal file
View file

@ -0,0 +1,237 @@
//! BSD mdoc format support.
//!
//! mdoc is the bsd manpage macro package. it uses semantic macros rather than
//! presentation macros:
//! .Fl v -> flag: -v
//! .Ar file -> argument: file
//! .Op ... -> optional: [...]
//! .Bl/.It/.El -> list begin/item/end
//! .Sh -> section header (note lowercase 'h', vs groff's .SH)
use crate::parsers::manpage::groff::{GroffLine, strip_groff_escapes};
use crate::parsers::manpage::{ManpageEntry, ManpageResult, OwnedParam, OwnedSwitch};
use crate::types::Positional;
/// detect mdoc format by looking for any .Sh macro.
pub fn is_mdoc(lines: &[GroffLine]) -> bool {
lines
.iter()
.any(|l| matches!(l, GroffLine::Macro { name, .. } if name == "Sh"))
}
/// extract renderable text from an mdoc line, skipping structural macros.
fn mdoc_text_of(line: &GroffLine) -> Option<String> {
match line {
GroffLine::Text(t) => Some(strip_groff_escapes(t)),
GroffLine::Macro { name, args } => match name.as_str() {
"Pp" | "Bl" | "El" | "Sh" | "Ss" | "Os" | "Dd" | "Dt" | "Oo" | "Oc" | "Op" => None,
_ => {
let text = strip_groff_escapes(args);
let text = text.trim();
if text.is_empty() {
None
} else {
Some(text.to_string())
}
}
},
_ => None,
}
}
/// parse an mdoc .It (list item) line that contains flag definitions.
/// mdoc .It lines look like: ".It Fl v Ar file"
/// where Fl = flag, Ar = argument.
fn parse_mdoc_it(args: &str) -> Option<ManpageEntry> {
let words: Vec<&str> = args
.split(' ')
.filter(|w| !w.is_empty() && *w != "Ns")
.collect();
let param = match words.as_slice() {
[_, _, "Ar", name, ..] => Some(OwnedParam::Mandatory(name.to_string())),
_ => None,
};
match words.as_slice() {
["Fl", ch, ..] if ch.len() == 1 && ch.chars().next().unwrap().is_ascii_alphanumeric() => {
Some(ManpageEntry {
switch: OwnedSwitch::Short(ch.chars().next().unwrap()),
param,
desc: String::new(),
})
}
["Fl", name, ..] if name.len() > 1 && name.starts_with('-') => Some(ManpageEntry {
switch: OwnedSwitch::Long(name[1..].to_string()),
param,
desc: String::new(),
}),
_ => None,
}
}
/// extract a positional argument from an mdoc line (.Ar or .Op Ar).
fn positional_of_mdoc_line(args: &str) -> Option<(String, bool)> {
let words: Vec<&str> = args.split(' ').filter(|w| !w.is_empty()).collect();
let variadic = words.contains(&"...");
match words.first() {
Some(name) if name.len() >= 2 => Some((name.to_ascii_lowercase(), variadic)),
_ => None,
}
}
/// parse an entire mdoc-format manpage.
/// walks through all classified lines looking for:
/// 1. .Bl/.It/.El list blocks containing flag definitions
/// 2. .Sh SYNOPSIS sections containing positional arguments (.Ar, .Op Ar)
pub fn parse_mdoc_lines(lines: &[GroffLine]) -> ManpageResult {
// collect description for an entry — until next structural macro
fn desc_of(lines: &[GroffLine], start: usize) -> (String, usize) {
let mut acc: Vec<String> = Vec::new();
let mut i = start;
while i < lines.len() {
if let GroffLine::Macro { name, .. } = &lines[i]
&& matches!(name.as_str(), "It" | "El" | "Sh" | "Ss")
{
break;
}
if let Some(t) = mdoc_text_of(&lines[i]) {
acc.push(t);
}
i += 1;
}
(acc.join(" ").trim().to_string(), i)
}
fn skip_to_el(lines: &[GroffLine], start: usize) -> usize {
let mut i = start;
while i < lines.len() {
if let GroffLine::Macro { name, .. } = &lines[i]
&& name == "El"
{
return i + 1;
}
i += 1;
}
i
}
/// parse a single .It entry: extract flag, collect description.
fn parse_it(
args: &str,
lines: &[GroffLine],
start: usize,
entries: &mut Vec<ManpageEntry>,
) -> usize {
let (desc, new_start) = desc_of(lines, start);
if let Some(mut entry) = parse_mdoc_it(args) {
entry.desc = desc;
entries.push(entry);
}
new_start
}
/// parse all .It entries within a .Bl/.El option list.
fn parse_option_list(
entries: &mut Vec<ManpageEntry>,
lines: &[GroffLine],
start: usize,
) -> usize {
let mut i = start;
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. } if name == "El" => return i + 1,
GroffLine::Macro { name, args } if name == "It" => {
i = parse_it(args, lines, i + 1, entries);
}
_ => i += 1,
}
}
i
}
fn parse_synopsis(
positionals: &mut Vec<(String, bool, bool)>,
lines: &[GroffLine],
start: usize,
) -> usize {
let mut i = start;
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. } if name == "Sh" => return i,
GroffLine::Macro { name, args } if name == "Ar" => {
if let Some((n, v)) = positional_of_mdoc_line(args) {
positionals.push((n, false, v));
}
i += 1;
}
GroffLine::Macro { name, args } if name == "Op" => {
let words: Vec<&str> = args.split(' ').filter(|w| !w.is_empty()).collect();
if matches!(words.first(), Some(&"Ar")) {
let rest = if args.len() > 3 { &args[3..] } else { "" };
if let Some((n, v)) = positional_of_mdoc_line(rest) {
positionals.push((n, true, v));
}
}
i += 1;
}
_ => i += 1,
}
}
i
}
let mut entries: Vec<ManpageEntry> = Vec::new();
let mut positionals: Vec<(String, bool, bool)> = Vec::new();
let mut i = 0;
while i < lines.len() {
// .Bl + .It header sequence — peek at first .It to decide if this is a flag list
if let GroffLine::Macro { name: n1, .. } = &lines[i]
&& n1 == "Bl"
{
let j = i + 1;
if j < lines.len()
&& let GroffLine::Macro {
name: n2,
args: it_args,
} = &lines[j]
&& n2 == "It"
{
let words: Vec<&str> = it_args.split(' ').filter(|w| !w.is_empty()).collect();
if matches!(words.first(), Some(&"Fl")) {
let k = parse_it(it_args, lines, j + 1, &mut entries);
i = parse_option_list(&mut entries, lines, k);
continue;
} else {
i = skip_to_el(lines, j + 1);
continue;
}
}
i = skip_to_el(lines, j);
continue;
}
if let GroffLine::Macro { name, args } = &lines[i]
&& name == "Sh"
&& args.trim().eq_ignore_ascii_case("SYNOPSIS")
{
i = parse_synopsis(&mut positionals, lines, i + 1);
continue;
}
i += 1;
}
// deduplicate positionals by name, preserving first-seen order
let mut seen: Vec<String> = Vec::new();
let mut deduped: Vec<(String, Positional)> = Vec::new();
for (name, optional, variadic) in positionals {
if !seen.contains(&name) {
seen.push(name.clone());
deduped.push((name, Positional { optional, variadic }));
}
}
ManpageResult {
entries,
subcommands: Vec::new(),
positionals: deduped,
description: String::new(),
}
}

View file

@ -0,0 +1,851 @@
//! section extraction from manpages.
//!
//! manpages are divided into sections by .SH macros. we extract OPTIONS,
//! NAME, SYNOPSIS, and COMMANDS sections for their specific content.
use nom::{Parser, sequence::preceded};
use crate::parsers::help::{parse_usage_args, parse_usage_flags, skip_command_name};
use crate::parsers::manpage::groff::{
GroffLine, strip_groff_escapes, strip_inline_macro_args, strip_space_macro_args,
};
use crate::parsers::manpage::{ManpageEntry, ManpageSubcommand, OwnedParam, OwnedSwitch};
use crate::types::{Param, Positional, Switch};
fn is_options_section(name: &str) -> bool {
let upper = name.trim().to_ascii_uppercase();
upper == "OPTIONS" || upper.contains("OPTION")
}
/// extract the lines from the OPTIONS section(s). collects from all
/// option-like .SH sections and concatenates them (handles the nix pattern
/// of "Options" and "Common Options" being separate sections).
/// falls back to DESCRIPTION if no OPTIONS section exists.
pub fn extract_options_section(lines: &[GroffLine]) -> Vec<GroffLine> {
let mut acc: Vec<GroffLine> = Vec::new();
let mut i = 0;
while i < lines.len() {
if let GroffLine::Macro { name, args } = &lines[i]
&& name == "SH"
&& is_options_section(args)
{
i += 1;
// synthetic separator between concatenated sections so that
// collect_desc_text (which stops on SH/SS) does not let descriptions
// bleed between sections.
if !acc.is_empty() {
acc.push(GroffLine::Macro {
name: "SH".to_string(),
args: String::new(),
});
}
while i < lines.len() {
if let GroffLine::Macro { name, .. } = &lines[i]
&& name == "SH"
{
break;
}
acc.push(lines[i].clone());
i += 1;
}
} else {
i += 1;
}
}
if !acc.is_empty() {
return acc;
}
// fallback: DESCRIPTION section
let mut i = 0;
while i < lines.len() {
if let GroffLine::Macro { name, args } = &lines[i]
&& name == "SH"
&& args.trim().eq_ignore_ascii_case("DESCRIPTION")
{
i += 1;
let mut desc_acc: Vec<GroffLine> = Vec::new();
while i < lines.len() {
if let GroffLine::Macro { name, .. } = &lines[i]
&& name == "SH"
{
break;
}
desc_acc.push(lines[i].clone());
i += 1;
}
return desc_acc;
}
i += 1;
}
Vec::new()
}
fn extract_named_section(lines: &[GroffLine], section_name: &str) -> Vec<GroffLine> {
let mut i = 0;
while i < lines.len() {
if let GroffLine::Macro { name, args } = &lines[i]
&& name == "SH"
&& args.trim().eq_ignore_ascii_case(section_name)
{
i += 1;
let mut acc: Vec<GroffLine> = Vec::new();
while i < lines.len() {
if let GroffLine::Macro { name, .. } = &lines[i]
&& name == "SH"
{
break;
}
acc.push(lines[i].clone());
i += 1;
}
return acc;
}
i += 1;
}
Vec::new()
}
/// the NAME section follows the convention "command \- short description".
/// extract the part after "\-" as the command's description.
/// handles both "\-" (groff) and " - " (plain text) separators.
pub fn extract_name_description(lines: &[GroffLine]) -> Option<String> {
let mut i = 0;
while i < lines.len() {
if let GroffLine::Macro { name, args } = &lines[i]
&& name == "SH"
&& args.trim().eq_ignore_ascii_case("NAME")
{
i += 1;
let mut acc: Vec<String> = Vec::new();
while i < lines.len() {
if let GroffLine::Macro { name, .. } = &lines[i]
&& name == "SH"
{
break;
}
match &lines[i] {
GroffLine::Text(t) => acc.push(t.clone()),
GroffLine::Macro { name, args }
if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR") =>
{
let text = strip_groff_escapes(&strip_inline_macro_args(args));
let text = text.trim();
if !text.is_empty() {
acc.push(text.to_string());
}
}
GroffLine::Macro { name, args } if name == "Nm" => {
let text = strip_groff_escapes(args);
let text = text.trim();
if !text.is_empty() {
acc.push(text.to_string());
}
}
GroffLine::Macro { name, args } if name == "Nd" => {
let text = strip_groff_escapes(args);
let text = text.trim();
if !text.is_empty() {
acc.push(format!("\\- {text}"));
}
}
_ => (),
}
i += 1;
}
let full = acc.join(" ").trim().to_string();
return split_name_separator(&full);
}
i += 1;
}
None
}
/// split a NAME line on either "\-" (groff) or " - " (plain).
/// returns the part after the separator, trimmed.
fn split_name_separator(full: &str) -> Option<String> {
// search for either marker
let groff_idx = find_padded(full, "\\-");
let dash_idx = find_padded(full, " - ");
let idx = match (groff_idx, dash_idx) {
(Some(a), Some(b)) => Some(a.min(b)),
(Some(a), None) => Some(a),
(None, Some(b)) => Some(b),
(None, None) => None,
}?;
// skip past the matched separator
let after = if full[idx..].starts_with("\\-") {
&full[idx + 2..]
} else {
&full[idx + 3..]
};
let desc = after.trim().to_string();
if desc.is_empty() { None } else { Some(desc) }
}
/// find a marker preceded and followed by optional surrounding space.
/// approximated by a simple substring search — accepts spaces on either
/// side without enforcing how many.
fn find_padded(s: &str, needle: &str) -> Option<usize> {
s.find(needle)
}
/// extract the command name from the SYNOPSIS section.
///
/// the SYNOPSIS section shows how to invoke the command:
/// .SH SYNOPSIS
/// .B git add
/// [\fIOPTIONS\fR] [\fB\-\-\fR] [\fI<pathspec>\fR...]
///
/// we extract the command name by taking consecutive "word" tokens until
/// we hit something that looks like an argument (starts with [, <, -, etc.).
pub fn extract_synopsis_command(contents: &str) -> Option<String> {
// pre-replace italic text (\fI...\fR) with angle-bracketed placeholders
// before classification strips the font info. italic in groff indicates
// a parameter/placeholder (e.g. \fIoperation\fR), not a command word.
// the angle brackets cause extract_cmd to stop at these tokens since
// '<' is in its stop set.
let preprocessed: Vec<String> = contents
.split('\n')
.map(replace_italic_with_angles)
.collect();
let classified: Vec<GroffLine> = preprocessed
.iter()
.map(|line| crate::parsers::manpage::groff::classify_line(line))
.collect();
let mut i = 0;
while i < classified.len() {
if let Some((stop_on_ss, content_start)) = synopsis_heading_at(&classified, i) {
i = content_start;
while i < classified.len() {
match &classified[i] {
GroffLine::Macro { name, .. }
if name == "SH" || (stop_on_ss && name == "SS") =>
{
return None;
}
GroffLine::Text(text) => {
let trimmed = text.trim();
if let Some(cmd) = synopsis_command_candidate(trimmed, true) {
return Some(cmd);
}
i += 1;
}
GroffLine::Macro { name, args } if name == "SY" => {
let text = strip_groff_escapes(args);
if let Some(cmd) = synopsis_command_candidate(text.trim(), false) {
return Some(cmd);
}
i += 1;
}
GroffLine::Macro { name, args }
if matches!(name.as_str(), "B" | "BI" | "BR") =>
{
let text = render_synopsis_command_macro(name, args);
if let Some(cmd) = synopsis_command_candidate(text.trim(), false) {
return Some(cmd);
}
i += 1;
}
_ => i += 1,
}
}
return None;
}
i += 1;
}
None
}
fn synopsis_heading_at(lines: &[GroffLine], i: usize) -> Option<(bool, usize)> {
let GroffLine::Macro { name, args } = &lines[i] else {
return None;
};
if !matches!(name.as_str(), "SH" | "SS") {
return None;
}
if args.trim().eq_ignore_ascii_case("SYNOPSIS") {
return Some((name == "SS", i + 1));
}
if !args.trim().is_empty() {
return None;
}
let mut j = i + 1;
while j < lines.len() {
match &lines[j] {
GroffLine::Text(text) if text.trim().eq_ignore_ascii_case("SYNOPSIS") => {
return Some((name == "SS", j + 1));
}
GroffLine::Blank | GroffLine::Comment => j += 1,
_ => return None,
}
}
None
}
fn render_synopsis_command_macro(name: &str, args: &str) -> String {
match name {
"B" | "I" => strip_space_macro_args(args),
_ => strip_groff_escapes(&strip_inline_macro_args(args))
.trim()
.to_string(),
}
}
fn synopsis_command_candidate(line: &str, reject_long_unmarked: bool) -> Option<String> {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.ends_with(':') {
return None;
}
let cmd = extract_cmd(trimmed)?;
if cmd.starts_with('.') {
return None;
}
if looks_like_synopsis_prose(trimmed, &cmd, reject_long_unmarked) {
None
} else {
Some(cmd)
}
}
fn looks_like_synopsis_prose(line: &str, cmd: &str, reject_long_unmarked: bool) -> bool {
let Some(first) = cmd.split_whitespace().next() else {
return true;
};
if matches!(
first.to_ascii_lowercase().as_str(),
"a" | "an" | "and" | "or" | "the" | "this" | "these"
) {
return true;
}
let line_has_invocation_marker = line.split_whitespace().any(|word| {
word.starts_with('[')
|| word.starts_with('<')
|| word.starts_with('-')
|| word.starts_with('{')
}) || line.contains('|');
if line.ends_with('.') && !line_has_invocation_marker {
return true;
}
if reject_long_unmarked && cmd.split_whitespace().count() > 3 && !line_has_invocation_marker {
return true;
}
let looks_like_sentence_starter = first.chars().next().is_some_and(|c| c.is_ascii_uppercase())
&& first.chars().skip(1).all(|c| c.is_ascii_lowercase());
looks_like_sentence_starter
&& line.split_whitespace().count() > 1
&& !line_has_invocation_marker
}
/// replace \fI...\f[RP] sequences with <...> so italic params are seen as
/// non-word tokens by extract_cmd.
///
/// exception: some manpages put the command name itself in italics (e.g.
/// git-am.1's synopsis reads `\fIgit am\fR ...`). when the first italic
/// block on the line appears at the very start (preceded only by
/// whitespace) and its content looks like a command word, we strip the
/// font markers but leave the content bare so extract_cmd treats it as
/// the command name rather than a placeholder.
fn replace_italic_with_angles(line: &str) -> String {
let bytes = line.as_bytes();
let len = bytes.len();
let mut out = String::with_capacity(len);
let mut i = 0;
let mut command_consumed = false;
while i < len {
// byte-compare to avoid panicking on non-ASCII char boundaries
if i + 3 <= len && &bytes[i..i + 3] == b"\\fI" {
// find closing \fR or \fP — scan to next '\\'
let inner_start = i + 3;
let mut j = inner_start;
while j < len && bytes[j] != b'\\' {
j += 1;
}
if j + 3 <= len
&& bytes[j] == b'\\'
&& bytes[j + 1] == b'f'
&& (bytes[j + 2] == b'R' || bytes[j + 2] == b'P')
{
let inner = &line[inner_start..j];
let at_line_start = !command_consumed && line[..i].chars().all(char::is_whitespace);
if at_line_start && italic_looks_like_command(inner) {
out.push_str(inner);
command_consumed = true;
} else {
out.push('<');
out.push_str(inner);
out.push('>');
}
i = j + 3;
continue;
}
}
let c = line[i..].chars().next().unwrap();
out.push(c);
i += c.len_utf8();
}
out
}
/// is the italic content something that looks like a command name (rather
/// than a placeholder)? lowercase letters, digits, hyphens, underscores,
/// dots, and spaces only, after groff escapes (like `\-`) are resolved.
fn italic_looks_like_command(inner: &str) -> bool {
let stripped = strip_groff_escapes(inner);
let trimmed = stripped.trim();
!trimmed.is_empty()
&& trimmed.chars().all(|c| {
c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, '-' | '_' | '.' | ' ')
})
}
/// extract the command name from a synopsis line by taking leading word tokens.
fn extract_cmd(line: &str) -> Option<String> {
let words: Vec<&str> = line.split(' ').filter(|w| !w.is_empty()).collect();
let is_cmd_char = |c: char| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.');
let mut taken: Vec<&str> = Vec::new();
for word in words {
let first = word.chars().next().unwrap();
if matches!(first, '[' | '-' | '<' | '(' | '{') {
break;
}
if word.chars().all(is_cmd_char) {
taken.push(word);
} else {
break;
}
}
if taken.is_empty() {
None
} else {
Some(taken.join(" "))
}
}
/// extract the lines that form the SYNOPSIS section.
fn extract_synopsis_section(lines: &[GroffLine]) -> Vec<GroffLine> {
extract_named_section(lines, "SYNOPSIS")
}
/// extract positional arguments from the SYNOPSIS section.
/// joins all text/formatting macro lines via `join_synopsis_text`, then
/// skips the command name prefix and runs `parse_usage_args` on the rest.
pub fn extract_synopsis_positionals(lines: &[GroffLine]) -> Vec<(String, Positional)> {
let full = join_synopsis_text(lines);
if full.is_empty() {
return Vec::new();
}
let result: nom::IResult<&str, Vec<(&str, Positional)>> =
preceded(skip_command_name, parse_usage_args).parse(&full);
match result {
Ok((_, map)) => map
.into_iter()
.map(|(k, v)| (k.to_ascii_lowercase(), v))
.collect(),
Err(_) => Vec::new(),
}
}
/// join the SYNOPSIS section into a single line of plain text, stripping
/// groff escapes and inline font macros. shared by both the positional
/// and flag extractors so they see identical input.
fn join_synopsis_text(lines: &[GroffLine]) -> String {
let section = extract_synopsis_section(lines);
let mut acc: Vec<String> = Vec::new();
for line in section {
match line {
GroffLine::Macro { name, .. } if name == "SS" || name == "br" => break,
GroffLine::Macro { name, args } if name == "SY" => {
let text = strip_groff_escapes(&args).trim().to_string();
if !text.is_empty() {
acc.push(text);
}
}
GroffLine::Macro { name, args } if name == "I" => {
let text = strip_groff_escapes(&args).trim().to_string();
if !text.is_empty() {
acc.push(format!("<{text}>"));
}
}
GroffLine::Macro { name, args } if name == "IR" => {
let text = render_leading_italic_arg(&args);
if !text.is_empty() {
acc.push(text);
}
}
GroffLine::Text(t) => {
let text = strip_groff_escapes(&t).trim().to_string();
if !text.is_empty() {
acc.push(text);
}
}
GroffLine::Macro { name, args } if name == "B" => {
let text = strip_space_macro_args(&args);
if !text.is_empty() {
acc.push(text);
}
}
GroffLine::Macro { name, args }
if matches!(name.as_str(), "B" | "BI" | "BR" | "IB" | "RB" | "RI") =>
{
let text = strip_groff_escapes(&strip_inline_macro_args(&args));
let text = text.trim();
if !text.is_empty() {
acc.push(text.to_string());
}
}
_ => (),
}
}
acc.join(" ").trim().to_string()
}
fn render_leading_italic_arg(args: &str) -> String {
let trimmed = args.trim();
if trimmed.is_empty() {
return String::new();
}
let (first, rest) = match trimmed.find(char::is_whitespace) {
Some(idx) => (&trimmed[..idx], trimmed[idx..].trim()),
None => (trimmed, ""),
};
let first = strip_groff_escapes(first).trim().to_string();
if first.is_empty() {
return String::new();
}
let rest = strip_groff_escapes(&strip_inline_macro_args(rest));
let rest = rest.trim();
if rest.is_empty() {
format!("<{first}>")
} else {
format!("<{first}> {rest}")
}
}
fn to_owned_switch(s: Switch<'_>) -> OwnedSwitch {
match s {
Switch::Short(c) => OwnedSwitch::Short(c),
Switch::Long(l) => OwnedSwitch::Long(l.to_string()),
Switch::Both(c, l) => OwnedSwitch::Both(c, l.to_string()),
}
}
fn to_owned_param(p: Param<'_>) -> OwnedParam {
match p {
Param::Mandatory(s) => OwnedParam::Mandatory(s.to_string()),
Param::Optional(s) => OwnedParam::Optional(s.to_string()),
}
}
/// extract flag-tagged entries from the SYNOPSIS line. some manpages
/// (notably nix-env, sed) declare flags only in the synopsis and never
/// repeat them as entries in the OPTIONS body, so the body-only pass
/// misses them. we join the synopsis text the same way the positional
/// extractor does, then run `parse_usage_flags` over every bracketed
/// switch+param. callers merge with body entries; body wins on duplicate
/// flag names since body descriptions are richer.
pub fn extract_synopsis_flags(lines: &[GroffLine]) -> Vec<ManpageEntry> {
let full = join_synopsis_text(lines);
if full.is_empty() {
return Vec::new();
}
let result: nom::IResult<&str, Vec<(Switch<'_>, Option<Param<'_>>)>> =
preceded(skip_command_name, parse_usage_flags).parse(&full);
match result {
Ok((_, pairs)) => pairs
.into_iter()
.map(|(switch, param)| ManpageEntry {
switch: to_owned_switch(switch),
param: param.map(to_owned_param),
desc: String::new(),
})
.collect(),
Err(_) => Vec::new(),
}
}
/// extract first-positional choices from prose lists in DESCRIPTION.
///
/// getent(1) is the motivating shape: the synopsis has a `database`
/// positional, while the actual database names are documented as a tagged
/// list under DESCRIPTION rather than as subcommands or options. The
/// completion model currently has no separate "positional choices" channel,
/// so these are represented as subcommand-like candidates for completion.
pub fn extract_description_positionals(lines: &[GroffLine]) -> Vec<ManpageSubcommand> {
let description = extract_named_section(lines, "DESCRIPTION");
if description.is_empty() || !description_mentions_listed_database(&description) {
return Vec::new();
}
let mut out = Vec::new();
let mut seen = std::collections::HashSet::new();
let mut i = 0;
let mut in_database_list = false;
while i < description.len() {
match &description[i] {
GroffLine::Text(text)
if text.to_ascii_lowercase().contains("listed below")
|| text.to_ascii_lowercase().contains("may be any of") =>
{
in_database_list = true;
i += 1;
}
GroffLine::Macro { name, .. } if name == "TP" && in_database_list => {
if i + 1 >= description.len() {
break;
}
let Some(name) = description_tag_name(&description[i + 1]) else {
i += 1;
continue;
};
if !is_description_choice_name(&name) {
i += 1;
continue;
}
let (desc, new_i) = collect_description_choice_desc(&description, i + 2);
if seen.insert(name.clone()) {
out.push(ManpageSubcommand { name, desc });
}
i = new_i;
}
_ => {
i += 1;
}
}
}
out
}
fn description_mentions_listed_database(lines: &[GroffLine]) -> bool {
let mut saw_database = false;
let mut saw_list = false;
for line in lines {
let text = match line {
GroffLine::Text(text) => text.clone(),
GroffLine::Macro { name, args }
if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR" | "RI") =>
{
strip_groff_escapes(&strip_inline_macro_args(args))
}
_ => String::new(),
};
let lower = text.to_ascii_lowercase();
saw_database |= lower.contains("database");
saw_list |= lower.contains("listed below") || lower.contains("may be any of");
}
saw_database && saw_list
}
fn description_tag_name(line: &GroffLine) -> Option<String> {
match line {
GroffLine::Text(text) => Some(text.trim().to_string()),
GroffLine::Macro { name, args }
if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR") =>
{
Some(
strip_groff_escapes(&strip_inline_macro_args(args))
.trim()
.to_string(),
)
}
_ => None,
}
}
fn is_description_choice_name(name: &str) -> bool {
!name.is_empty()
&& name.len() <= 32
&& !name.starts_with('-')
&& name
.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '_')
}
fn collect_description_choice_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
let mut parts = Vec::new();
let mut i = start;
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. } if matches!(name.as_str(), "TP" | "SH" | "SS") => {
break;
}
GroffLine::Text(text) => {
parts.push(text.clone());
i += 1;
}
GroffLine::Macro { name, args }
if matches!(name.as_str(), "B" | "BI" | "BR" | "I" | "IR" | "RI") =>
{
let text = strip_groff_escapes(&strip_inline_macro_args(args));
let text = text.trim();
if !text.is_empty() {
parts.push(text.to_string());
}
i += 1;
}
GroffLine::Blank | GroffLine::Comment => {
i += 1;
}
GroffLine::Macro { .. } => {
i += 1;
}
}
}
(first_sentence(&parts.join(" ")), i)
}
fn first_sentence(text: &str) -> String {
let text = text.split_whitespace().collect::<Vec<_>>().join(" ");
for marker in [". ", ".) "] {
if let Some(idx) = text.find(marker) {
return text[..idx + 1].trim().to_string();
}
}
text.trim().to_string()
}
fn is_commands_section(name: &str) -> bool {
let trimmed = name.trim();
// strip a trailing parenthetical group so "HIGH-LEVEL COMMANDS (PORCELAIN)"
// (which is git.1's pattern) is treated as "HIGH-LEVEL COMMANDS".
let core = match (trimmed.rfind('('), trimmed.ends_with(')')) {
(Some(open), true) => trimmed[..open].trim(),
_ => trimmed,
};
let upper = core.to_ascii_uppercase();
if upper == "COMMAND" || upper == "COMMANDS" {
return true;
}
// accept headings ending in " COMMANDS" — catches "GIT COMMANDS",
// "MAIN COMMANDS", "HIGH-LEVEL COMMANDS", "LOW-LEVEL COMMANDS". the
// leading space prevents matches against "COMMAND LINE OPTIONS" etc.
upper.ends_with(" COMMANDS")
}
/// find all COMMANDS/.COMMAND sections and collect their lines.
pub fn extract_commands_section(lines: &[GroffLine]) -> Vec<GroffLine> {
let mut acc: Vec<GroffLine> = Vec::new();
let mut i = 0;
while i < lines.len() {
if let GroffLine::Macro { name, args } = &lines[i]
&& name == "SH"
&& is_commands_section(args)
{
i += 1;
while i < lines.len() {
if let GroffLine::Macro { name, .. } = &lines[i]
&& name == "SH"
{
break;
}
acc.push(lines[i].clone());
i += 1;
}
} else {
i += 1;
}
}
acc
}
/// extract SUBCOMMAND-style sections (clap-generated manpages put each
/// subcommand under its own .SH SUBCOMMAND header with a Usage: line).
/// returns triples of (name, description, lines) so the caller can re-parse
/// each section as its own help_result.
pub fn extract_subcommand_sections(lines: &[GroffLine]) -> Vec<(String, String, Vec<GroffLine>)> {
// split into sections at .SH boundaries, keeping only SUBCOMMAND(S) ones
let mut sections: Vec<Vec<GroffLine>> = Vec::new();
let mut current_name: Option<String> = None;
let mut current: Vec<GroffLine> = Vec::new();
for line in lines {
if let GroffLine::Macro { name, args } = line
&& name == "SH"
{
if current_name.is_some() {
sections.push(std::mem::take(&mut current));
}
let n = args.trim().to_ascii_uppercase();
if n == "SUBCOMMAND" || n == "SUBCOMMANDS" {
current_name = Some(n);
} else {
current_name = None;
}
continue;
}
if current_name.is_some() {
current.push(line.clone());
}
}
if current_name.is_some() {
sections.push(current);
}
let mut out = Vec::new();
for section in sections {
// scan section lines for the Usage: line to get the subcommand name
let mut subcmd_name: Option<String> = None;
let mut desc_lines: Vec<String> = Vec::new();
for line in &section {
if subcmd_name.is_some() {
break;
}
match line {
GroffLine::Text(t) => match find_usage_name(t) {
Some(name) => subcmd_name = Some(name),
None => desc_lines.push(t.clone()),
},
GroffLine::Macro { name, args }
if matches!(name.as_str(), "TP" | "B" | "BI" | "BR") =>
{
let text = strip_groff_escapes(&strip_inline_macro_args(args));
let text = text.trim();
subcmd_name = find_usage_name(text);
}
_ => (),
}
}
if let Some(name) = subcmd_name {
let desc_raw = desc_lines.join(" ");
let desc = strip_groff_escapes(&desc_raw).trim().to_string();
let desc = strip_backtick_words(&desc);
out.push((name, desc, section));
}
}
out
}
/// look for "Usage: NAME" and return NAME if found.
/// NAME contains alphanumeric, underscore, or dash.
fn find_usage_name(text: &str) -> Option<String> {
const MARKER: &str = "Usage: ";
let idx = text.find(MARKER)?;
let after = &text[idx + MARKER.len()..];
let end = after
.find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-'))
.unwrap_or(after.len());
if end == 0 {
None
} else {
Some(after[..end].to_string())
}
}
/// strip backtick-quoted words: `word` -> word.
fn strip_backtick_words(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut i = 0;
let bytes = s.as_bytes();
while i < bytes.len() {
if bytes[i] == b'`'
&& let Some(end) = s[i + 1..].find('`')
{
out.push_str(&s[i + 1..i + 1 + end]);
i += end + 2;
continue;
}
let c = s[i..].chars().next().unwrap();
out.push(c);
i += c.len_utf8();
}
out
}

View file

@ -0,0 +1,456 @@
//! strategy-based entry extraction.
//!
//! rather than a single monolithic parser, we use multiple "strategies" that
//! each target a specific groff formatting pattern. this is necessary because
//! manpage authors use very different macro combinations for the same purpose.
use nom::{Parser, combinator::opt};
use crate::make_macro_walker;
use crate::parsers::help::{help_parser, param_parser, switch_parser};
use crate::parsers::manpage::groff::{
GroffLine, strip_groff_escapes, strip_inline_macro_args, strip_space_macro_args,
};
use crate::parsers::manpage::{ManpageEntry, OwnedParam, OwnedSwitch};
use crate::types::{Param, Switch};
/// collect consecutive text lines, joining them with spaces.
/// returns (collected, remaining).
fn collect_text_lines(lines: &[GroffLine]) -> (String, &[GroffLine]) {
let mut acc: Vec<&str> = Vec::new();
let mut i = 0;
while i < lines.len() {
match &lines[i] {
GroffLine::Text(t) => acc.push(t),
_ => break,
}
i += 1;
}
(acc.join(" "), &lines[i..])
}
fn collect_description_lines(lines: &[GroffLine], start: usize) -> (String, usize) {
let mut acc: Vec<String> = Vec::new();
let mut i = start;
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. }
if matches!(name.as_str(), "TP" | "TQ" | "IP" | "PP" | "SH" | "SS") =>
{
break;
}
GroffLine::Text(t) => {
acc.push(t.clone());
i += 1;
}
GroffLine::Macro { name, args }
if matches!(
name.as_str(),
"B" | "BI" | "BR" | "I" | "IR" | "IB" | "RB" | "RI"
) =>
{
let text = tag_of_macro(name, args);
if !text.is_empty() {
acc.push(text);
}
i += 1;
}
GroffLine::Blank | GroffLine::Comment => {
i += 1;
}
GroffLine::Macro { .. } => {
i += 1;
}
}
}
(acc.join(" "), i)
}
fn to_owned_switch(s: Switch<'_>) -> OwnedSwitch {
match s {
Switch::Short(c) => OwnedSwitch::Short(c),
Switch::Long(l) => OwnedSwitch::Long(l.to_string()),
Switch::Both(c, l) => OwnedSwitch::Both(c, l.to_string()),
}
}
fn to_owned_param(p: Param<'_>) -> OwnedParam {
match p {
Param::Mandatory(s) => OwnedParam::Mandatory(s.to_string()),
Param::Optional(s) => OwnedParam::Optional(s.to_string()),
}
}
/// attempt to parse a tag string (e.g. "-v, --verbose FILE") into an entry.
/// uses the nom switch_parser + param_parser from the help module.
/// returns None if the tag doesn't look like a flag definition.
pub fn parse_tag_to_entry(tag: &str, desc: String) -> Option<ManpageEntry> {
let tag = strip_groff_escapes(tag);
let tag = tag.trim();
let result: nom::IResult<&str, (Switch<'_>, Option<Param<'_>>)> =
(switch_parser, opt(param_parser)).parse(tag);
match result {
Ok((_, (switch, param))) => Some(ManpageEntry {
switch: to_owned_switch(switch),
param: param.map(to_owned_param),
desc,
}),
Err(_) => None,
}
}
/// extract tag text from a macro line.
/// .B and .I preserve spaces (single argument); .BI, .BR, .IR alternate
/// fonts and concatenate arguments.
pub fn tag_of_macro(name: &str, args: &str) -> String {
match name {
"B" | "I" => strip_space_macro_args(args),
_ => strip_groff_escapes(&strip_inline_macro_args(args))
.trim()
.to_string(),
}
}
// strategy a: .TP style (most common — gnu coreutils, help2man).
// .TP introduces a tagged paragraph: the next line is the "tag" (flag name)
// and subsequent text lines are the description. the tag can be plain text
// or wrapped in a formatting macro (.B, .BI, etc.).
pub fn strategy_tp(lines: &[GroffLine]) -> Vec<ManpageEntry> {
let mut out = Vec::new();
let mut i = 0;
while i < lines.len() {
let GroffLine::Macro { name, .. } = &lines[i] else {
i += 1;
continue;
};
if name != "TP" {
i += 1;
continue;
}
let (tags, body_start) = collect_tp_tags(lines, i + 1);
if tags.is_empty() {
i += 1;
continue;
}
let (desc, new_i) = collect_description_lines(lines, body_start);
out.extend(entries_from_tag_alternates(&tags, desc));
i = new_i;
}
out
}
fn collect_tp_tags(lines: &[GroffLine], start: usize) -> (Vec<String>, usize) {
let mut tags = Vec::new();
let mut i = start;
loop {
if i >= lines.len() {
break;
}
let Some(tag) = tag_from_line(&lines[i]) else {
break;
};
tags.push(tag);
i += 1;
if i < lines.len() && matches!(&lines[i], GroffLine::Macro { name, .. } if name == "TQ") {
i += 1;
continue;
}
break;
}
(tags, i)
}
fn tag_from_line(line: &GroffLine) -> Option<String> {
match line {
GroffLine::Text(tag) => Some(tag.clone()),
GroffLine::Macro { name, args }
if matches!(
name.as_str(),
"B" | "I" | "BI" | "BR" | "IR" | "IB" | "RB" | "RI"
) =>
{
Some(tag_of_macro(name, args))
}
_ => None,
}
}
fn entries_from_tag_alternates(tags: &[String], desc: String) -> Vec<ManpageEntry> {
let entries: Vec<ManpageEntry> = tags
.iter()
.filter_map(|tag| parse_tag_to_entry(tag, desc.clone()))
.collect();
if entries.len() == 2
&& let Some(combined) = combine_short_long_alternates(&entries[0], &entries[1])
{
return vec![combined];
}
entries
}
fn combine_short_long_alternates(
left: &ManpageEntry,
right: &ManpageEntry,
) -> Option<ManpageEntry> {
match (&left.switch, &right.switch) {
(OwnedSwitch::Long(l), OwnedSwitch::Short(c)) => Some(ManpageEntry {
switch: OwnedSwitch::Both(*c, l.clone()),
param: left.param.clone().or_else(|| right.param.clone()),
desc: left.desc.clone(),
}),
(OwnedSwitch::Short(c), OwnedSwitch::Long(l)) => Some(ManpageEntry {
switch: OwnedSwitch::Both(*c, l.clone()),
param: right.param.clone().or_else(|| left.param.clone()),
desc: left.desc.clone(),
}),
_ => None,
}
}
// strategy b: .IP style (curl, hand-written manpages).
// .IP takes an inline tag argument: .IP "-v, --verbose"
// the description follows as text lines.
make_macro_walker!(pub strategy_ip -> Vec<ManpageEntry>, on macro "IP" =>
|lines, i, args| {
let tag = strip_groff_escapes(args);
let (desc, rest) = collect_text_lines(&lines[i + 1..]);
let new_i = lines.len() - rest.len();
parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
}
);
// strategy c: .PP + .RS/.RE style (git, docbook-generated manpages).
// flag entries are introduced by .PP (paragraph), with the flag name as
// plain text, followed by a .RS (indent) block containing the description,
// closed by .RE (de-indent).
make_macro_walker!(pub strategy_pp_rs -> Vec<ManpageEntry>, on macro "PP" =>
|lines, i, _args| {
if i + 1 >= lines.len() { return None; }
if let GroffLine::Text(tag) = &lines[i + 1] {
let (desc, new_i) = collect_pp_rs_desc(lines, i + 2);
parse_tag_to_entry(tag, desc).map(|e| (e, new_i))
} else {
None
}
}
);
fn collect_pp_rs_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
let mut acc: Vec<String> = Vec::new();
let mut i = start;
// outer: look for .RS marker or text
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. } if name == "RS" => {
i += 1;
// inside .RS — collect until .RE or boundary macro
while i < lines.len() {
match &lines[i] {
GroffLine::Macro { name, .. } if name == "RE" => {
return (acc.join(" "), i + 1);
}
GroffLine::Text(t) => {
acc.push(t.clone());
i += 1;
}
GroffLine::Macro { name, .. } if name == "PP" || name == "SH" => {
return (acc.join(" "), i);
}
_ => i += 1,
}
}
return (acc.join(" "), i);
}
GroffLine::Text(t) => {
acc.push(t.clone());
i += 1;
}
_ => return (acc.join(" "), i),
}
}
(acc.join(" "), i)
}
/// strategy d: deroff fallback — strip all groff markup, then feed the
/// resulting plain text through the help parser.
pub fn strategy_deroff(lines: &[GroffLine]) -> Vec<ManpageEntry> {
let mut buffer = String::with_capacity(256);
for line in lines {
match line {
GroffLine::Text(text) => {
buffer.push_str(text);
buffer.push('\n');
}
GroffLine::Macro { name, args }
if matches!(name.as_str(), "BI" | "BR" | "IR" | "B" | "I") =>
{
let text = strip_groff_escapes(&strip_inline_macro_args(args));
buffer.push_str(&text);
buffer.push('\n');
}
GroffLine::Blank => buffer.push('\n'),
_ => (),
}
}
match help_parser(&buffer) {
Ok((_, result)) => result
.entries
.into_iter()
.map(|e| ManpageEntry {
switch: to_owned_switch(e.switch),
param: e.param.map(to_owned_param),
desc: e.desc.join(" "),
})
.collect(),
Err(_) => Vec::new(),
}
}
fn is_bullet_ip(args: &str) -> bool {
!args.trim().is_empty()
}
// strategy e: nix3-style bullet .IP with .UR/.UE hyperlinks.
// nix's manpages use .IP with bullet markers for flag entries, interleaved
// with .UR/.UE hyperlink macros. the flag tag is in text lines after the
// bullet .IP, and the description follows a non-bullet .IP marker.
make_macro_walker!(pub strategy_nix -> Vec<ManpageEntry>, on macro "IP" =>
|lines, i, args| {
if !is_bullet_ip(args) { return None; }
// collect tag: skip .UR/.UE macros, gather Text lines
let mut tag_idx = i + 1;
let mut tag_parts: Vec<String> = Vec::new();
while tag_idx < lines.len() {
match &lines[tag_idx] {
GroffLine::Macro { name, .. } if name == "UR" || name == "UE" => {
tag_idx += 1;
}
GroffLine::Text(t) => {
tag_parts.push(t.clone());
tag_idx += 1;
}
_ => break,
}
}
let tag = tag_parts.join(" ");
let (desc, new_i) = collect_nix_desc(lines, tag_idx);
parse_tag_to_entry(&tag, desc).map(|e| (e, new_i))
}
);
fn collect_nix_desc(lines: &[GroffLine], start: usize) -> (String, usize) {
if start >= lines.len() {
return (String::new(), start);
}
let mut i = start;
// require non-bullet .IP marker for description
if let GroffLine::Macro { name, args } = &lines[i]
&& name == "IP"
&& args.trim().is_empty()
{
i += 1;
} else {
return (String::new(), start);
}
let mut parts: Vec<String> = Vec::new();
while i < lines.len() {
match &lines[i] {
GroffLine::Text(t) => {
parts.push(t.clone());
i += 1;
}
GroffLine::Macro { name, args } if name == "IP" => {
if !args.trim().is_empty() {
// next bullet entry — stop
return (parts.join(" "), i);
}
// non-bullet .IP = continuation paragraph
i += 1;
}
GroffLine::Macro { name, .. } if name == "SS" || name == "SH" => {
return (parts.join(" "), i);
}
GroffLine::Macro { name, .. } if name == "RS" => {
i = skip_rs(lines, i + 1, 1);
}
GroffLine::Macro { .. } => {
i += 1;
}
GroffLine::Blank | GroffLine::Comment => {
i += 1;
}
}
}
(parts.join(" "), i)
}
fn skip_rs(lines: &[GroffLine], start: usize, mut depth: usize) -> usize {
let mut i = start;
while i < lines.len() {
if let GroffLine::Macro { name, .. } = &lines[i] {
if name == "RE" {
depth -= 1;
if depth == 0 {
return i + 1;
}
} else if name == "RS" {
depth += 1;
}
}
i += 1;
}
i
}
/// count occurrences of a specific macro in the section.
fn count_macro(name: &str, lines: &[GroffLine]) -> usize {
lines
.iter()
.filter(|line| matches!(line, GroffLine::Macro { name: n, .. } if n == name))
.count()
}
/// auto-detect and try strategies, return the one with most entries.
/// first counts macros to determine which strategies are applicable,
/// then runs all applicable ones and picks the winner by entry count.
/// if no specialized strategy produces results, falls back to deroff.
pub fn extract_entries(lines: &[GroffLine]) -> Vec<ManpageEntry> {
let tp = count_macro("TP", lines);
let ip = count_macro("IP", lines);
let pp = count_macro("PP", lines);
let rs = count_macro("RS", lines);
let ur = count_macro("UR", lines);
let mut specialized: Vec<(&str, Vec<ManpageEntry>)> = Vec::new();
if tp > 0 {
specialized.push(("TP", strategy_tp(lines)));
}
if ip > 0 {
specialized.push(("IP", strategy_ip(lines)));
}
if pp > 0 && rs > 0 {
specialized.push(("PP+RS", strategy_pp_rs(lines)));
}
if ur > 0 && ip > 0 {
specialized.push(("nix", strategy_nix(lines)));
}
let candidates: Vec<(&str, Vec<ManpageEntry>)> = {
let filtered: Vec<_> = specialized
.into_iter()
.filter(|(_, e)| !e.is_empty())
.collect();
if filtered.is_empty() {
vec![("deroff", strategy_deroff(lines))]
} else {
filtered
}
};
let mut best: Vec<ManpageEntry> = Vec::new();
for (_, entries) in candidates {
if entries.len() >= best.len() {
best = entries;
}
}
best
}

3
src/parsers/mod.rs Normal file
View file

@ -0,0 +1,3 @@
pub mod help;
pub mod manpage;
pub mod nushell;

475
src/parsers/nushell.rs Normal file
View file

@ -0,0 +1,475 @@
//! generate nushell `extern` definitions from parsed help data.
//!
//! this module is the code generation backend. it takes a [`ManpageResult`]
//! (from the help or manpage parsers) and produces nushell source that defines
//! `extern` declarations — nushell's mechanism for teaching the shell about
//! external commands' flags and subcommands so it can offer completions.
//!
//! key responsibilities:
//! - deduplicating flag entries (same flag from multiple help sources)
//! - mapping parameter names to nushell types (path, int, string)
//! - formatting flags in nushell syntax: --flag(-f): type # description
//! - handling positional arguments with nushell's ordering constraints
//! - escaping special characters for nushell string literals
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::sync::OnceLock;
use crate::parsers::manpage::{
ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
};
use crate::types::Positional;
/// nushell built-in commands and keywords — we must never generate `extern`
/// definitions for these because it would shadow nushell's own implementations.
/// maintained manually and should be updated with new nushell releases.
pub const NUSHELL_BUILTINS: &[&str] = &[
"alias",
"all",
"ansi",
"any",
"append",
"ast",
"attr",
"bits",
"break",
"bytes",
"cal",
"cd",
"char",
"chunk-by",
"chunks",
"clear",
"collect",
"columns",
"commandline",
"compact",
"complete",
"config",
"const",
"continue",
"cp",
"date",
"debug",
"decode",
"def",
"default",
"describe",
"detect",
"do",
"drop",
"du",
"each",
"echo",
"encode",
"enumerate",
"error",
"every",
"exec",
"exit",
"explain",
"explore",
"export",
"export-env",
"extern",
"fill",
"filter",
"find",
"first",
"flatten",
"for",
"format",
"from",
"generate",
"get",
"glob",
"grid",
"group-by",
"hash",
"headers",
"help",
"hide",
"hide-env",
"histogram",
"history",
"http",
"if",
"ignore",
"input",
"insert",
"inspect",
"interleave",
"into",
"is-admin",
"is-empty",
"is-not-empty",
"is-terminal",
"items",
"job",
"join",
"keybindings",
"kill",
"last",
"length",
"let",
"let-env",
"lines",
"load-env",
"loop",
"ls",
"match",
"math",
"merge",
"metadata",
"mkdir",
"mktemp",
"module",
"move",
"mut",
"mv",
"nu-check",
"nu-highlight",
"open",
"overlay",
"panic",
"par-each",
"parse",
"path",
"plugin",
"port",
"prepend",
"print",
"ps",
"query",
"random",
"reduce",
"reject",
"rename",
"return",
"reverse",
"rm",
"roll",
"rotate",
"run-external",
"save",
"schema",
"scope",
"select",
"seq",
"shuffle",
"skip",
"sleep",
"slice",
"sort",
"sort-by",
"source",
"source-env",
"split",
"start",
"stor",
"str",
"sys",
"table",
"take",
"tee",
"term",
"timeit",
"to",
"touch",
"transpose",
"try",
"tutor",
"ulimit",
"umask",
"uname",
"uniq",
"uniq-by",
"unlet",
"update",
"upsert",
"url",
"use",
"values",
"version",
"view",
"watch",
"where",
"which",
"while",
"whoami",
"window",
"with-env",
"wrap",
"zip",
];
fn builtin_set() -> &'static HashSet<&'static str> {
static SET: OnceLock<HashSet<&'static str>> = OnceLock::new();
SET.get_or_init(|| NUSHELL_BUILTINS.iter().copied().collect())
}
/// returns true if the given command name collides with a nushell built-in.
pub fn is_nushell_builtin(cmd: &str) -> bool {
builtin_set().contains(cmd)
}
/// map parameter names to nushell types.
/// nushell's `extern` declarations use typed parameters, so we infer the type
/// from the parameter name. file/path-related names become "path" (enables
/// path completion), numeric names become "int", everything else is "string".
pub fn nushell_type_of_param(name: &str) -> &'static str {
match name {
"FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY" | "FILENAME"
| "PATTERNFILE" => "path",
"NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH" | "LINES" | "DEPTH"
| "depth" => "int",
_ => "string",
}
}
/// escape a string for use inside nushell double-quoted string literals.
/// only double quotes and backslashes need escaping in nushell's syntax.
pub fn escape_nu(s: &str) -> Cow<'_, str> {
if !s.contains('"') && !s.contains('\\') {
Cow::Borrowed(s)
} else {
let mut buf = String::with_capacity(s.len() + 4);
for c in s.chars() {
match c {
'"' => buf.push_str("\\\""),
'\\' => buf.push_str("\\\\"),
c => buf.push(c),
}
}
Cow::Owned(buf)
}
}
fn entry_key(e: &ManpageEntry) -> String {
match &e.switch {
OwnedSwitch::Short(c) => format!("-{c}"),
OwnedSwitch::Long(l) | OwnedSwitch::Both(_, l) => format!("--{l}"),
}
}
fn entry_score(e: &ManpageEntry) -> i32 {
let switch_bonus = if matches!(e.switch, OwnedSwitch::Both(_, _)) {
10
} else {
0
};
let param_bonus = if e.param.is_some() { 5 } else { 0 };
let desc_bonus = (e.desc.len() / 10).min(5) as i32;
switch_bonus + param_bonus + desc_bonus
}
/// deduplicate flag entries that refer to the same flag.
///
/// when the same flag appears multiple times (e.g. from overlapping manpage
/// sections or repeated help text), we keep the "best" version using a score:
/// - both short+long form present: +10 (most informative)
/// - has a parameter: +5
/// - description length bonus: up to +5
///
/// after deduplication by long name, we also remove standalone short flags
/// whose letter is already covered by a Both(short, long) entry. this prevents
/// emitting both "-v" and "--verbose(-v)" which nushell would reject as a
/// duplicate. the filtering preserves original ordering from the help text.
pub fn dedup_entries(entries: &[ManpageEntry]) -> Vec<ManpageEntry> {
let mut best: HashMap<String, &ManpageEntry> = HashMap::new();
for e in entries {
let key = entry_key(e);
match best.get(&key) {
Some(prev) if entry_score(prev) >= entry_score(e) => {}
_ => {
best.insert(key, e);
}
}
}
let mut covered: HashSet<char> = HashSet::new();
for e in best.values() {
if let OwnedSwitch::Both(c, _) = &e.switch {
covered.insert(*c);
}
}
let mut seen: HashSet<String> = HashSet::new();
let mut out: Vec<ManpageEntry> = Vec::new();
for e in entries {
let key = entry_key(e);
if seen.contains(&key) {
continue;
}
if let OwnedSwitch::Short(c) = &e.switch
&& covered.contains(c)
{
continue;
}
seen.insert(key.clone());
out.push((*best.get(&key).unwrap()).clone());
}
out
}
/// format a single flag entry as a nushell `extern` parameter line.
/// output examples:
/// " --verbose(-v) # increase verbosity"
/// " --output(-o): path # write output to file"
/// " -n: int # number of results"
///
/// the description is right-padded to column 40 with a "# " comment prefix.
pub fn format_flag(entry: &ManpageEntry) -> String {
let name = match &entry.switch {
OwnedSwitch::Both(c, l) => format!("--{l}(-{c})"),
OwnedSwitch::Long(l) => format!("--{l}"),
OwnedSwitch::Short(c) => format!("-{c}"),
};
let typed = match &entry.param {
Some(OwnedParam::Mandatory(p)) | Some(OwnedParam::Optional(p)) => {
format!(": {}", nushell_type_of_param(p))
}
None => String::new(),
};
let flag = format!(" {name}{typed}");
if entry.desc.is_empty() {
flag
} else {
let pad_len = 40usize.saturating_sub(flag.len()).max(1);
format!("{flag}{}# {}", " ".repeat(pad_len), entry.desc)
}
}
/// format a positional argument as a nushell `extern` parameter line.
/// nushell syntax: "...name: type" for variadic, "name?: type" for optional.
/// hyphens in names are converted to underscores since nushell identifiers
/// cannot contain hyphens.
pub fn format_positional(name: &str, p: &Positional) -> String {
let name_underscored: String = name
.chars()
.map(|c| if c == '-' { '_' } else { c })
.collect();
let prefix = if p.variadic { "..." } else { "" };
let suffix = if p.optional && !p.variadic { "?" } else { "" };
let typ = nushell_type_of_param(&name.to_ascii_uppercase());
format!(" {prefix}{name_underscored}{suffix}: {typ}")
}
/// enforce nushell's positional argument ordering rules:
/// 1. no required positional may follow an optional one
/// 2. at most one variadic ("rest") parameter is allowed
///
/// if a required positional appears after an optional one, it is silently
/// promoted to optional. duplicate variadic params are dropped.
pub fn fixup_positionals(positionals: Vec<(String, Positional)>) -> Vec<(String, Positional)> {
let mut seen_optional = false;
let mut seen_variadic = false;
let mut out = Vec::with_capacity(positionals.len());
for (name, mut p) in positionals {
if p.variadic {
if seen_variadic {
continue;
}
seen_variadic = true;
seen_optional = true;
out.push((name, p));
} else if seen_optional {
p.optional = true;
out.push((name, p));
} else {
seen_optional = p.optional;
out.push((name, p));
}
}
out
}
/// derive a nushell `module` name from a command name.
/// replaces non-alphanumeric characters with hyphens and appends "-completions".
pub fn module_name_of(cmd_name: &str) -> String {
let mut s: String = cmd_name
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
c
} else {
'-'
}
})
.collect();
s.push_str("-completions");
s
}
/// generate the full nushell `extern` block for a command.
///
/// produces output like:
/// export extern "git add" [
/// ...pathspec?: path
/// --verbose(-v) # be verbose
/// --dry-run(-n) # dry run
/// ]
///
/// subcommands that weren't resolved into their own full definitions get
/// stub `extern` blocks with just a comment containing their description:
/// export extern "git stash" [ # stash changes
/// ]
pub fn generate_extern(cmd_name: &str, result: &ManpageResult) -> String {
let entries = dedup_entries(&result.entries);
let escaped_name = escape_nu(cmd_name);
let positionals = fixup_positionals(result.positionals.clone());
let mut out = String::new();
out.push_str(&format!("export extern \"{escaped_name}\" [\n"));
for (name, p) in &positionals {
out.push_str(&format_positional(name, p));
out.push('\n');
}
for entry in &entries {
out.push_str(&format_flag(entry));
out.push('\n');
}
out.push_str("]\n");
for sc in &result.subcommands {
out.push_str(&format!(
"\nexport extern \"{} {}\" [ # {}\n]\n",
escaped_name,
escape_nu(&sc.name),
escape_nu(&sc.desc)
));
}
out
}
/// generate a complete nushell `module` wrapping the `extern`.
/// output: "module git-completions { ... }\n\nuse git-completions *\n"
/// the `use` at the end makes the `extern` immediately available in scope.
pub fn generate_module(cmd_name: &str, result: &ManpageResult) -> String {
let mod_name = module_name_of(cmd_name);
format!(
"module {mod_name} {{\n{}}}\n\nuse {mod_name} *\n",
generate_extern(cmd_name, result)
)
}
/// convenience wrapper: generate an `extern` from just a list of entries.
pub fn generate_extern_from_entries(cmd_name: &str, entries: Vec<ManpageEntry>) -> String {
generate_extern(
cmd_name,
&ManpageResult {
entries,
subcommands: Vec::new(),
positionals: Vec::new(),
description: String::new(),
},
)
}
/// stub subcommand entry used when extracting subcommands from a parsed
/// help result for nushell output.
pub fn manpage_subcommand_from(name: &str, desc: &str) -> ManpageSubcommand {
ManpageSubcommand {
name: name.to_string(),
desc: desc.to_string(),
}
}

233
src/pool.rs Normal file
View file

@ -0,0 +1,233 @@
//! BFS-queue worker pool for parallel subprocess scraping.
//!
//! workers pull jobs from a shared queue and call a user-supplied
//! handler; the handler gets a `Submitter` to push newly-discovered
//! child jobs back onto the same queue. when the in-flight count
//! reaches zero the pool shuts down and `wait` returns.
//!
//! the queue-back design is deliberate: command-help trees are uneven
//! (one binary has 30 subs, another has 1). queue-back keeps every
//! worker fed; spawn-in-place would leave cores idle on lopsided trees.
//!
//! synchronization: `parking_lot::Condvar` parks workers when the queue is
//! empty. the queue, in-flight count, and close state live under one mutex so
//! the condvar predicate cannot miss a wakeup.
//! parking_lot gives no-poison locks (no `Result` noise on every
//! `lock()`) and a single-syscall fast path in the uncontended case.
use std::collections::VecDeque;
use std::sync::Arc;
use std::thread::{self, JoinHandle};
use parking_lot::{Condvar, Mutex};
struct State<J> {
queue: VecDeque<J>,
/// jobs created but not yet completed. counts both queued and
/// in-progress jobs. workers can exit once wait() has closed the pool
/// and this reaches 0.
in_flight: usize,
/// set by wait(), which is also the point where top-level submission is
/// done. workers must not exit on transient empty periods before this.
closed: bool,
}
/// shared state held behind an `Arc` by every worker and by the
/// submitter handles handed to the per-job handler.
struct Inner<J> {
state: Mutex<State<J>>,
notify: Condvar,
}
impl<J> Inner<J> {
fn submit(&self, job: J) {
let mut state = self.state.lock();
state.in_flight += 1;
state.queue.push_back(job);
self.notify.notify_one();
}
fn next(&self) -> Option<J> {
let mut state = self.state.lock();
loop {
if let Some(job) = state.queue.pop_front() {
return Some(job);
}
if state.closed && state.in_flight == 0 {
return None;
}
self.notify.wait(&mut state);
}
}
fn complete(&self) {
let mut state = self.state.lock();
state.in_flight -= 1;
if state.closed && state.in_flight == 0 {
// we were the last in-flight job after wait() closed top-level
// submission, so parked workers can wake and exit.
self.notify.notify_all();
}
}
}
/// cheap-to-clone handle that lets a job handler enqueue further jobs.
/// passed by reference to the handler closure.
pub struct Submitter<J> {
inner: Arc<Inner<J>>,
}
impl<J> Clone for Submitter<J> {
fn clone(&self) -> Self {
Submitter {
inner: self.inner.clone(),
}
}
}
impl<J> Submitter<J> {
pub fn submit(&self, job: J) {
self.inner.submit(job);
}
}
/// BFS-queue worker pool. each worker pulls a job, calls the handler
/// (which may submit further jobs via the passed `Submitter`), then marks
/// the job complete. when in-flight reaches zero the pool shuts down and
/// `wait` returns.
pub struct ScrapePool<J> {
inner: Arc<Inner<J>>,
workers: Vec<JoinHandle<()>>,
}
impl<J: Send + 'static> ScrapePool<J> {
/// spawn `num_workers` threads that run `handler` on each job pulled
/// from the queue. the handler receives the job by value and a
/// `&Submitter` for enqueuing children.
pub fn new<F>(num_workers: usize, handler: F) -> Self
where
F: Fn(J, &Submitter<J>) + Send + Sync + 'static,
{
let inner = Arc::new(Inner {
state: Mutex::new(State {
queue: VecDeque::new(),
in_flight: 0,
closed: false,
}),
notify: Condvar::new(),
});
let handler = Arc::new(handler);
let workers = (0..num_workers.max(1))
.map(|_| {
let inner = inner.clone();
let handler = handler.clone();
thread::spawn(move || {
let submitter = Submitter {
inner: inner.clone(),
};
while let Some(job) = inner.next() {
handler(job, &submitter);
inner.complete();
}
})
})
.collect();
ScrapePool { inner, workers }
}
/// submit a top-level job. typically called by the orchestrating
/// thread before `wait`; handlers should use `Submitter::submit`.
pub fn submit(&self, job: J) {
self.inner.submit(job);
}
/// block until all jobs (initial + transitively discovered) have
/// completed, then join every worker thread.
pub fn wait(self) {
{
let mut state = self.inner.state.lock();
state.closed = true;
// Wake workers so they can either drain queued work or exit if
// the pool was empty. The close flag is guarded by this same lock,
// so this cannot race with a worker entering the condvar wait.
self.inner.notify.notify_all();
}
for w in self.workers {
let _ = w.join();
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Duration;
#[test]
fn flat_jobs_processed_once_each() {
let collected: Arc<Mutex<Vec<u32>>> = Arc::new(Mutex::new(Vec::new()));
let pool = ScrapePool::new(4, {
let collected = collected.clone();
move |n: u32, _: &Submitter<u32>| {
collected.lock().push(n);
}
});
for i in 0..100u32 {
pool.submit(i);
}
pool.wait();
let mut got = collected.lock().clone();
got.sort();
assert_eq!(got, (0..100).collect::<Vec<_>>());
}
#[test]
fn discovered_children_processed_to_completion() {
// BFS expansion: every odd number under 10 spawns its successor.
let collected: Arc<Mutex<Vec<u32>>> = Arc::new(Mutex::new(Vec::new()));
let pool = ScrapePool::new(2, {
let collected = collected.clone();
move |n: u32, sub: &Submitter<u32>| {
collected.lock().push(n);
if n < 10 && n % 2 == 1 {
sub.submit(n + 1);
}
}
});
for i in [1u32, 3, 5, 7, 9] {
pool.submit(i);
}
pool.wait();
let mut got = collected.lock().clone();
got.sort();
assert_eq!(got, vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
}
#[test]
fn transient_empty_queue_before_wait_does_not_stop_workers() {
let processed = Arc::new(AtomicUsize::new(0));
let pool = ScrapePool::new(1, {
let processed = processed.clone();
move |_: u32, _: &Submitter<u32>| {
processed.fetch_add(1, Ordering::SeqCst);
}
});
pool.submit(1);
while processed.load(Ordering::SeqCst) == 0 {
thread::yield_now();
}
thread::sleep(Duration::from_millis(10));
pool.submit(2);
pool.wait();
assert_eq!(processed.load(Ordering::SeqCst), 2);
}
#[test]
fn wait_with_no_jobs_returns_immediately() {
let pool: ScrapePool<()> = ScrapePool::new(2, |_, _| {});
pool.wait();
}
}

657
src/store.rs Normal file
View file

@ -0,0 +1,657 @@
//! filesystem store for parsed completion data.
//!
//! write side: serialize ManpageResult to JSON, derive sanitised
//! filenames from command names ("git add" → git_add.json).
//!
//! read side: look up a command by name across the user cache + system
//! dirs, deserialize JSON or parse a .nu extern blob back into a result.
use std::collections::HashMap;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use serde_json::Value;
use crate::parsers::manpage::{
ManpageEntry, ManpageResult, ManpageSubcommand, OwnedParam, OwnedSwitch,
};
use crate::types::Positional;
/// default cache directory: $XDG_CACHE_HOME/inshellah, falling back to
/// $HOME/.cache/inshellah.
pub fn default_store_path() -> PathBuf {
if let Ok(xdg) = std::env::var("XDG_CACHE_HOME")
&& !xdg.is_empty()
{
return PathBuf::from(xdg).join("inshellah");
}
if let Ok(home) = std::env::var("HOME") {
return PathBuf::from(home).join(".cache/inshellah");
}
PathBuf::from(".cache/inshellah")
}
/// create directory and all parents.
pub fn ensure_dir(dir: &Path) -> io::Result<()> {
fs::create_dir_all(dir)
}
/// derive a safe filename from a command name.
/// spaces in subcommand names ("git add") become "_" ("git_add").
/// any other non-filesystem-safe characters are also replaced.
pub fn filename_of_command(cmd: &str) -> String {
cmd.chars()
.map(|c| match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => c,
' ' => '_',
_ => '_',
})
.collect()
}
/// reverse: a filename "git_add" produces command name "git add".
/// underscores are flipped to spaces unconditionally — names that
/// genuinely contained an underscore round-trip as spaces, which is
/// acceptable since the read side is only used for display.
pub fn command_of_filename(base: &str) -> String {
base.replace('_', " ")
}
fn escape_json(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
'\x08' => out.push_str("\\b"),
'\x0c' => out.push_str("\\f"),
c if (c as u32) < 0x20 => {
out.push_str(&format!("\\u{:04x}", c as u32));
}
c => out.push(c),
}
}
out
}
fn json_string(s: &str) -> String {
format!("\"{}\"", escape_json(s))
}
fn json_switch(s: &OwnedSwitch) -> String {
match s {
OwnedSwitch::Short(c) => {
format!(
r#"{{"type":"short","char":{}}}"#,
json_string(&c.to_string())
)
}
OwnedSwitch::Long(l) => {
format!(r#"{{"type":"long","name":{}}}"#, json_string(l))
}
OwnedSwitch::Both(c, l) => format!(
r#"{{"type":"both","char":{},"name":{}}}"#,
json_string(&c.to_string()),
json_string(l)
),
}
}
fn json_param(p: &Option<OwnedParam>) -> String {
match p {
None => "null".to_string(),
Some(OwnedParam::Mandatory(n)) => {
format!(r#"{{"kind":"mandatory","name":{}}}"#, json_string(n))
}
Some(OwnedParam::Optional(n)) => {
format!(r#"{{"kind":"optional","name":{}}}"#, json_string(n))
}
}
}
fn json_entry(e: &ManpageEntry) -> String {
format!(
r#"{{"switch":{},"param":{},"desc":{}}}"#,
json_switch(&e.switch),
json_param(&e.param),
json_string(&e.desc)
)
}
fn json_subcommand(sc: &ManpageSubcommand) -> String {
format!(
r#"{{"name":{},"desc":{}}}"#,
json_string(&sc.name),
json_string(&sc.desc)
)
}
fn json_positional(name: &str, p: &Positional) -> String {
format!(
r#"{{"name":{},"optional":{},"variadic":{}}}"#,
json_string(name),
p.optional,
p.variadic
)
}
fn json_list<T, F: Fn(&T) -> String>(items: &[T], f: F) -> String {
let parts: Vec<String> = items.iter().map(f).collect();
format!("[{}]", parts.join(","))
}
/// serialize a ManpageResult to JSON:
/// {"source":..., "description":..., "entries":[...],
/// "subcommands":[...], "positionals":[...]}
pub fn json_of_result(source: &str, result: &ManpageResult) -> String {
let entries = json_list(&result.entries, json_entry);
let subcommands = json_list(&result.subcommands, json_subcommand);
let positionals_parts: Vec<String> = result
.positionals
.iter()
.map(|(name, p)| json_positional(name, p))
.collect();
let positionals = format!("[{}]", positionals_parts.join(","));
format!(
r#"{{"source":{},"description":{},"entries":{},"subcommands":{},"positionals":{}}}"#,
json_string(source),
json_string(&result.description),
entries,
subcommands,
positionals,
)
}
pub fn write_file(path: &Path, contents: &str) -> io::Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
fs::write(path, contents)
}
/// write the parsed result for `command` into `dir` as JSON.
pub fn write_result(
dir: &Path,
command: &str,
source: &str,
result: &ManpageResult,
) -> io::Result<()> {
let path = dir.join(format!("{}.json", filename_of_command(command)));
write_file(&path, &json_of_result(source, result))
}
/// write a native-nushell completion blob (the binary supplied its own).
pub fn write_native(dir: &Path, command: &str, data: &str) -> io::Result<()> {
let path = dir.join(format!("{}.nu", filename_of_command(command)));
write_file(&path, data)
}
// --- read side ---
fn read_file(path: &Path) -> Option<String> {
fs::read_to_string(path).ok()
}
fn read_json_result(path: &Path) -> Option<(String, ManpageResult)> {
let data = read_file(path)?;
let v = serde_json::from_str::<Value>(&data).ok()?;
let source = v
.get("source")
.and_then(|x| x.as_str())
.unwrap_or("json")
.to_string();
Some((source, result_from_json(&v)))
}
fn switch_from_json(v: &Value) -> Option<OwnedSwitch> {
let t = v.get("type")?.as_str()?;
match t {
"short" => {
let c = v.get("char")?.as_str()?.chars().next()?;
Some(OwnedSwitch::Short(c))
}
"long" => Some(OwnedSwitch::Long(v.get("name")?.as_str()?.to_string())),
"both" => {
let c = v.get("char")?.as_str()?.chars().next()?;
let n = v.get("name")?.as_str()?.to_string();
Some(OwnedSwitch::Both(c, n))
}
_ => None,
}
}
fn param_from_json(v: &Value) -> Option<OwnedParam> {
if v.is_null() {
return None;
}
let kind = v.get("kind")?.as_str()?;
let name = v.get("name")?.as_str()?.to_string();
Some(match kind {
"mandatory" => OwnedParam::Mandatory(name),
"optional" => OwnedParam::Optional(name),
_ => return None,
})
}
fn entry_from_json(v: &Value) -> Option<ManpageEntry> {
let switch = switch_from_json(v.get("switch")?)?;
let param = v.get("param").and_then(param_from_json);
let desc = v
.get("desc")
.and_then(|d| d.as_str())
.unwrap_or("")
.to_string();
Some(ManpageEntry {
switch,
param,
desc,
})
}
fn subcommand_from_json(v: &Value) -> Option<ManpageSubcommand> {
let name = v.get("name")?.as_str()?.to_string();
let desc = v
.get("desc")
.and_then(|d| d.as_str())
.unwrap_or("")
.to_string();
Some(ManpageSubcommand { name, desc })
}
fn positional_from_json(v: &Value) -> Option<(String, Positional)> {
let name = v.get("name")?.as_str()?.to_string();
let optional = v.get("optional").and_then(|x| x.as_bool()).unwrap_or(false);
let variadic = v.get("variadic").and_then(|x| x.as_bool()).unwrap_or(false);
Some((name, Positional { optional, variadic }))
}
/// deserialize a JSON cache entry into ManpageResult.
pub fn result_from_json(v: &Value) -> ManpageResult {
let description = v
.get("description")
.and_then(|d| d.as_str())
.unwrap_or("")
.to_string();
let entries = v
.get("entries")
.and_then(|x| x.as_array())
.map(|arr| arr.iter().filter_map(entry_from_json).collect())
.unwrap_or_default();
let subcommands = v
.get("subcommands")
.and_then(|x| x.as_array())
.map(|arr| arr.iter().filter_map(subcommand_from_json).collect())
.unwrap_or_default();
let positionals = v
.get("positionals")
.and_then(|x| x.as_array())
.map(|arr| arr.iter().filter_map(positional_from_json).collect())
.unwrap_or_default();
ManpageResult {
entries,
subcommands,
positionals,
description,
}
}
/// parse nushell `export extern` blocks out of a .nu source file.
///
/// returns the help_result that matches `target_cmd` — its entries,
/// positionals, and any other extern blocks under it (`cmd sub`) are
/// folded into the subcommands list.
pub fn parse_nu_completions(target_cmd: &str, contents: &str) -> ManpageResult {
let mut blocks: Vec<NuBlock> = Vec::new();
let mut current_desc = String::new();
let mut in_block = false;
let mut block = NuBlock::default();
for line in contents.split('\n') {
let trimmed = line.trim();
if !in_block {
if let Some(stripped) = trimmed.strip_prefix("# ") {
current_desc = stripped.trim().to_string();
} else if trimmed.contains("export extern")
&& let Some(cmd) = extract_extern_name(trimmed)
{
in_block = true;
block = NuBlock {
cmd,
description: std::mem::take(&mut current_desc),
..Default::default()
};
} else {
current_desc.clear();
}
} else if trimmed.starts_with(']') {
blocks.push(std::mem::take(&mut block));
in_block = false;
} else {
let (param_part, desc) = match trimmed.find('#') {
Some(idx) => (trimmed[..idx].trim(), trimmed[idx + 1..].trim()),
None => (trimmed, ""),
};
parse_nu_param_line_into(param_part, desc, &mut block);
}
}
if in_block {
blocks.push(block);
}
// find the block matching target_cmd
let Some(matched) = blocks.iter().find(|b| b.cmd == target_cmd) else {
return ManpageResult::default();
};
// collect immediate subcommands from other blocks ("target sub" pattern)
let prefix = format!("{target_cmd} ");
let mut subcommands: Vec<ManpageSubcommand> = Vec::new();
for b in &blocks {
if let Some(suffix) = b.cmd.strip_prefix(&prefix)
&& !suffix.contains(' ')
&& !suffix.is_empty()
{
subcommands.push(ManpageSubcommand {
name: suffix.to_string(),
desc: b.description.clone(),
});
}
}
ManpageResult {
entries: matched.entries.clone(),
subcommands,
positionals: matched.positionals.clone(),
description: matched.description.clone(),
}
}
fn extract_extern_name(line: &str) -> Option<String> {
let idx = line.find("export extern")?;
let after = line[idx + "export extern".len()..].trim_start();
if let Some(rest) = after.strip_prefix('"') {
let end = rest.find('"')?;
Some(rest[..end].to_string())
} else {
let end = after
.find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-'))
.unwrap_or(after.len());
if end == 0 {
None
} else {
Some(after[..end].to_string())
}
}
}
fn parse_nu_param_line_into(param_part: &str, desc: &str, block: &mut NuBlock) {
if param_part.len() < 2 {
return;
}
if let Some(after) = param_part.strip_prefix("--") {
// long flag: --name(-c): type or --name: type or --name
let (name, rest) = split_at_non_name_char(after);
if name.is_empty() {
return;
}
let mut short: Option<char> = None;
let mut rest = rest;
if let Some(after_open) = rest.strip_prefix("(-")
&& let Some(c) = after_open.chars().next()
&& after_open[c.len_utf8()..].starts_with(')')
{
short = Some(c);
rest = &after_open[c.len_utf8() + 1..];
}
let param = parse_type_suffix(rest);
let switch = match short {
Some(c) => OwnedSwitch::Both(c, name.to_string()),
None => OwnedSwitch::Long(name.to_string()),
};
block.entries.push(ManpageEntry {
switch,
param,
desc: desc.to_string(),
});
} else if param_part.starts_with('-') {
// short flag: -c
if let Some(c) = param_part.chars().nth(1)
&& c.is_ascii_alphanumeric()
{
block.entries.push(ManpageEntry {
switch: OwnedSwitch::Short(c),
param: None,
desc: desc.to_string(),
});
}
} else {
// positional: name: type or name?: type or ...name: type
let variadic = param_part.starts_with("...");
let after_prefix = if variadic {
&param_part[3..]
} else {
param_part
};
let optional = after_prefix.contains('?');
let name_end = after_prefix.find([':', '?']).unwrap_or(after_prefix.len());
let name = after_prefix[..name_end].trim();
let name: String = name
.chars()
.map(|c| if c == '-' { '_' } else { c })
.collect();
if !name.is_empty() && !name.starts_with('-') {
let duplicate = block
.positionals
.iter()
.any(|(existing, _)| existing.eq_ignore_ascii_case(&name));
if !duplicate {
block.positionals.push((
name,
Positional {
optional: optional || variadic,
variadic,
},
));
}
}
}
}
fn split_at_non_name_char(s: &str) -> (&str, &str) {
let end = s
.find(|c: char| !(c.is_ascii_alphanumeric() || c == '-'))
.unwrap_or(s.len());
(&s[..end], &s[end..])
}
/// parse a `: type` suffix into an OwnedParam (always Mandatory since the
/// nushell extern syntax doesn't distinguish optional-with-default).
fn parse_type_suffix(s: &str) -> Option<OwnedParam> {
let s = s.trim_start();
let s = s.strip_prefix(':')?;
let s = s.trim_start();
let end = s
.find(|c: char| !c.is_ascii_alphabetic())
.unwrap_or(s.len());
if end == 0 {
None
} else {
Some(OwnedParam::Mandatory(s[..end].to_string()))
}
}
#[derive(Default)]
struct NuBlock {
cmd: String,
entries: Vec<ManpageEntry>,
positionals: Vec<(String, Positional)>,
description: String,
}
/// look up a command's parsed result. source priority is native nushell,
/// then manpage JSON, then help JSON. parent .nu files are searched for
/// subcommand lookups because clap-generated .nu files contain all extern
/// blocks in a single file.
pub fn lookup(dirs: &[PathBuf], command: &str) -> Option<ManpageResult> {
let base_name = filename_of_command(command);
let parent_base = command
.find(' ')
.map(|i| filename_of_command(&command[..i]));
for directory in dirs {
let nu_path = directory.join(format!("{base_name}.nu"));
if let Some(data) = read_file(&nu_path) {
return Some(parse_nu_completions(command, &data));
}
if let Some(pb) = &parent_base {
let parent_nu = directory.join(format!("{pb}.nu"));
if let Some(data) = read_file(&parent_nu) {
let r = parse_nu_completions(command, &data);
if !r.entries.is_empty() || !r.subcommands.is_empty() || !r.positionals.is_empty() {
return Some(r);
}
}
}
}
for directory in dirs {
let json_path = directory.join(format!("{base_name}.json"));
if let Some((source, result)) = read_json_result(&json_path)
&& source != "help"
{
return Some(result);
}
}
for directory in dirs {
let json_path = directory.join(format!("{base_name}.json"));
if let Some((_, result)) = read_json_result(&json_path) {
return Some(result);
}
}
None
}
/// look up a command's raw stored data (JSON or .nu source).
pub fn lookup_raw(dirs: &[PathBuf], command: &str) -> Option<String> {
let base_name = filename_of_command(command);
for directory in dirs {
let nu_path = directory.join(format!("{base_name}.nu"));
if let Some(data) = read_file(&nu_path) {
return Some(data);
}
}
for directory in dirs {
let json_path = directory.join(format!("{base_name}.json"));
if let Some(data) = read_file(&json_path) {
return Some(data);
}
}
None
}
fn chop_extension(filename: &str) -> Option<&str> {
filename
.strip_suffix(".json")
.or_else(|| filename.strip_suffix(".nu"))
}
/// list all indexed commands across all store directories.
/// returns a sorted, deduplicated list of command names.
pub fn all_commands(dirs: &[PathBuf]) -> Vec<String> {
let mut out: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
for directory in dirs {
let Ok(entries) = fs::read_dir(directory) else {
continue;
};
for entry in entries.flatten() {
if let Some(name) = entry.file_name().to_str()
&& let Some(base) = chop_extension(name)
{
out.insert(command_of_filename(base));
}
}
}
out.into_iter().collect()
}
/// discover subcommands of a command by scanning filenames in the store
/// (e.g. for "git", finds "git_add.json", "git_log.json").
pub fn subcommands_of(dirs: &[PathBuf], command: &str) -> Vec<ManpageSubcommand> {
let prefix = format!("{}_", filename_of_command(command));
let mut seen: HashMap<String, ManpageSubcommand> = HashMap::new();
for directory in dirs {
let Ok(entries) = fs::read_dir(directory) else {
continue;
};
for entry in entries.flatten() {
let Some(filename) = entry.file_name().to_str().map(|s| s.to_string()) else {
continue;
};
if !filename.starts_with(&prefix) {
continue;
}
let is_json = filename.ends_with(".json");
let Some(base) = chop_extension(&filename) else {
continue;
};
let rest = &base[prefix.len()..];
if rest.is_empty() || rest.contains('_') {
continue;
}
if seen.contains_key(rest) {
continue;
}
let desc = if is_json {
read_file(&entry.path())
.and_then(|d| serde_json::from_str::<Value>(&d).ok())
.and_then(|v| {
v.get("description")
.and_then(|x| x.as_str())
.map(|s| s.to_string())
})
.unwrap_or_default()
} else {
String::new()
};
seen.insert(
rest.to_string(),
ManpageSubcommand {
name: rest.to_string(),
desc,
},
);
}
}
let mut out: Vec<ManpageSubcommand> = seen.into_values().collect();
out.sort_by(|a, b| a.name.cmp(&b.name));
out
}
/// determine how a command was indexed: "help", "manpage", "native", etc.
/// for JSON files, returns the "source" field. for .nu files, returns "native".
pub fn file_type_of(dirs: &[PathBuf], command: &str) -> Option<String> {
let base = filename_of_command(command);
for directory in dirs {
let nu_path = directory.join(format!("{base}.nu"));
if nu_path.exists() {
return Some("native".to_string());
}
}
for directory in dirs {
let json_path = directory.join(format!("{base}.json"));
if json_path.exists() {
return Some(
read_file(&json_path)
.and_then(|d| serde_json::from_str::<Value>(&d).ok())
.and_then(|v| v.get("source").and_then(|x| x.as_str()).map(String::from))
.unwrap_or_else(|| "json".to_string()),
);
}
}
None
}

34
src/types.rs Normal file
View file

@ -0,0 +1,34 @@
pub enum Switch<'a> {
Short(char),
Long(&'a str),
Both(char, &'a str),
}
pub enum Param<'a> {
Mandatory(&'a str),
Optional(&'a str),
}
pub struct OptionEntry<'a> {
pub switch: Switch<'a>,
pub param: Option<Param<'a>>,
pub desc: Vec<&'a str>,
}
pub struct Subcommand<'a> {
pub name: &'a str,
pub desc: &'a str,
}
#[derive(Debug, Clone)]
pub struct Positional {
pub optional: bool,
pub variadic: bool,
}
pub struct HelpResult<'a> {
pub entries: Vec<OptionEntry<'a>>,
pub subcommands: Vec<Subcommand<'a>>,
pub positionals: Vec<(&'a str, Positional)>,
pub desc: &'a str,
}

View file

@ -1,3 +0,0 @@
(test
(name test_inshellah)
(libraries inshellah str))

View file

@ -1,610 +0,0 @@
open Inshellah.Parser
open Inshellah.Manpage
open Inshellah.Nushell
let failures = ref 0
let passes = ref 0
let check name condition =
if condition then begin
incr passes;
Printf.printf " PASS: %s\n" name
end else begin
incr failures;
Printf.printf " FAIL: %s\n" name
end
let parse txt =
match parse_help txt with
| Ok r -> r
| Error msg -> failwith (Printf.sprintf "parse_help failed: %s" msg)
(* --- Help parser tests --- *)
let test_gnu_basic () =
Printf.printf "\n== GNU basic flags ==\n";
let r = parse " -a, --all do not ignore entries starting with .\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "both switch" (e.switch = Both ('a', "all"));
check "no param" (e.param = None);
check "desc" (String.length e.desc > 0)
let test_gnu_eq_param () =
Printf.printf "\n== GNU = param ==\n";
let r = parse " --block-size=SIZE scale sizes by SIZE\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "long switch" (e.switch = Long "block-size");
check "mandatory param" (e.param = Some (Mandatory "SIZE"))
let test_gnu_opt_param () =
Printf.printf "\n== GNU optional param ==\n";
let r = parse " --color[=WHEN] color the output WHEN\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "long switch" (e.switch = Long "color");
check "optional param" (e.param = Some (Optional "WHEN"))
let test_underscore_param () =
Printf.printf "\n== Underscore in param (TIME_STYLE) ==\n";
let r = parse " --time-style=TIME_STYLE time/date format\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "param with underscore" (e.param = Some (Mandatory "TIME_STYLE"))
let test_short_only () =
Printf.printf "\n== Short-only flag ==\n";
let r = parse " -v verbose output\n" in
check "one entry" (List.length r.entries = 1);
check "short switch" ((List.hd r.entries).switch = Short 'v')
let test_long_only () =
Printf.printf "\n== Long-only flag ==\n";
let r = parse " --help display help\n" in
check "one entry" (List.length r.entries = 1);
check "long switch" ((List.hd r.entries).switch = Long "help")
let test_multiline_desc () =
Printf.printf "\n== Multi-line description ==\n";
let r = parse {| --block-size=SIZE with -l, scale sizes by SIZE when printing them;
e.g., '--block-size=M'; see SIZE format below
|} in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "desc includes continuation" (String.length e.desc > 50)
let test_multiple_entries () =
Printf.printf "\n== Multiple entries ==\n";
let r = parse {| -a, --all do not ignore entries starting with .
-A, --almost-all do not list implied . and ..
--author with -l, print the author of each file
|} in
check "three entries" (List.length r.entries = 3)
let test_clap_short_sections () =
Printf.printf "\n== Clap short with section headers ==\n";
let r = parse {|INPUT OPTIONS:
-e, --regexp=PATTERN A pattern to search for.
-f, --file=PATTERNFILE Search for patterns from the given file.
SEARCH OPTIONS:
-s, --case-sensitive Search case sensitively.
|} in
check "three entries" (List.length r.entries = 3);
let e = List.hd r.entries in
check "first is regexp" (e.switch = Both ('e', "regexp"));
check "first has param" (e.param = Some (Mandatory "PATTERN"))
let test_clap_long_style () =
Printf.printf "\n== Clap long style (desc below flag) ==\n";
let r = parse {| -H, --hidden
Include hidden directories and files.
--no-ignore
Do not respect ignore files.
|} in
check "two entries" (List.length r.entries = 2);
let e = List.hd r.entries in
check "hidden switch" (e.switch = Both ('H', "hidden"));
check "desc below" (String.length e.desc > 0)
let test_clap_long_angle_param () =
Printf.printf "\n== Clap long angle bracket param ==\n";
let r = parse {| --nonprintable-notation <notation>
Set notation for non-printable characters.
|} in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "long switch" (e.switch = Long "nonprintable-notation");
check "angle param" (e.param = Some (Mandatory "notation"))
let test_space_upper_param () =
Printf.printf "\n== Space-separated ALL_CAPS param ==\n";
let r = parse " -f, --foo FOO foo help\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "switch" (e.switch = Both ('f', "foo"));
check "space param" (e.param = Some (Mandatory "FOO"))
let test_go_cobra_flags () =
Printf.printf "\n== Go/Cobra flags ==\n";
let r = parse {|Flags:
-D, --debug Enable debug mode
-H, --host string Daemon socket to connect to
-v, --version Print version information
|} in
check "three flag entries" (List.length r.entries = 3);
(* Check the host flag has a type param *)
let host = List.nth r.entries 1 in
check "host switch" (host.switch = Both ('H', "host"));
check "host type param" (host.param = Some (Mandatory "string"))
let test_go_cobra_subcommands () =
Printf.printf "\n== Go/Cobra subcommands ==\n";
let r = parse {|Common Commands:
run Create and run a new container from an image
exec Execute a command in a running container
build Build an image from a Dockerfile
|} in
check "has subcommands" (List.length r.subcommands > 0)
let test_busybox_tab () =
Printf.printf "\n== Busybox tab-indented ==\n";
let r = parse "\t-1\tOne column output\n\t-a\tInclude names starting with .\n" in
check "two entries" (List.length r.entries = 2);
check "first is -1" ((List.hd r.entries).switch = Short '1')
let test_no_debug_prints () =
Printf.printf "\n== No debug side effects ==\n";
(* The old parser had print_endline at module load time.
If we got here without "opt param is running" on stdout, we're good. *)
check "no debug prints" true
(* --- Manpage parser tests --- *)
let test_manpage_tp_style () =
Printf.printf "\n== Manpage .TP style ==\n";
let groff = {|.SH OPTIONS
.TP
\fB\-a\fR, \fB\-\-all\fR
do not ignore entries starting with .
.TP
\fB\-A\fR, \fB\-\-almost\-all\fR
do not list implied . and ..
.TP
\fB\-\-block\-size\fR=\fISIZE\fR
with \fB\-l\fR, scale sizes by SIZE
.SH AUTHOR
Written by someone.
|} in
let result = parse_manpage_string groff in
check "three entries" (List.length result.entries = 3);
if List.length result.entries >= 1 then begin
let e = List.hd result.entries in
check "first is -a/--all" (e.switch = Both ('a', "all"));
check "first desc" (String.length e.desc > 0)
end;
if List.length result.entries >= 3 then begin
let e = List.nth result.entries 2 in
check "block-size switch" (e.switch = Long "block-size");
check "block-size param" (e.param = Some (Mandatory "SIZE"))
end
let test_manpage_ip_style () =
Printf.printf "\n== Manpage .IP style ==\n";
let groff = {|.SH OPTIONS
.IP "\fB\-k\fR, \fB\-\-insecure\fR"
Allow insecure connections.
.IP "\fB\-o\fR, \fB\-\-output\fR \fIfile\fR"
Write output to file.
.SH SEE ALSO
|} in
let result = parse_manpage_string groff in
check "two entries" (List.length result.entries = 2);
if List.length result.entries >= 1 then begin
let e = List.hd result.entries in
check "first is -k/--insecure" (e.switch = Both ('k', "insecure"))
end
let test_manpage_groff_stripping () =
Printf.printf "\n== Groff escape stripping ==\n";
let s = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
check "font escapes removed" (not (String.contains s 'f' && String.contains s 'B'));
check "dashes converted" (String.contains s '-');
let s2 = strip_groff_escapes {|\(aqhello\(aq|} in
check "aq -> quote" (String.contains s2 '\'')
let test_manpage_empty_options () =
Printf.printf "\n== Manpage with no OPTIONS section ==\n";
let groff = {|.SH NAME
foo \- does stuff
.SH DESCRIPTION
Does stuff.
|} in
let result = parse_manpage_string groff in
check "no entries" (List.length result.entries = 0)
let test_slash_switch_separator () =
Printf.printf "\n== Slash switch separator (--long / -s) ==\n";
let r = parse " --verbose / -v Increase verbosity\n" in
check "one entry" (List.length r.entries = 1);
let e = List.hd r.entries in
check "both switch" (e.switch = Both ('v', "verbose"));
check "no param" (e.param = None);
check "desc" (e.desc = "Increase verbosity")
let test_manpage_nix3_style () =
Printf.printf "\n== Manpage nix3 style ==\n";
let groff = {|.SH Options
.SS Logging-related options
.IP "\(bu" 3
.UR #opt-verbose
\f(CR--verbose\fR
.UE
/ \f(CR-v\fR
.IP
Increase the logging verbosity level.
.IP "\(bu" 3
.UR #opt-quiet
\f(CR--quiet\fR
.UE
.IP
Decrease the logging verbosity level.
.SH SEE ALSO
|} in
let result = parse_manpage_string groff in
check "two entries" (List.length result.entries = 2);
if List.length result.entries >= 1 then begin
let e = List.hd result.entries in
check "verbose is Both" (e.switch = Both ('v', "verbose"));
check "verbose desc" (String.length e.desc > 0)
end;
if List.length result.entries >= 2 then begin
let e = List.nth result.entries 1 in
check "quiet is Long" (e.switch = Long "quiet");
check "quiet desc" (String.length e.desc > 0)
end
let test_manpage_nix3_with_params () =
Printf.printf "\n== Manpage nix3 with params ==\n";
let groff = {|.SH Options
.IP "\(bu" 3
.UR #opt-arg
\f(CR--arg\fR
.UE
\fIname\fR \fIexpr\fR
.IP
Pass the value as the argument name to Nix functions.
.IP "\(bu" 3
.UR #opt-include
\f(CR--include\fR
.UE
/ \f(CR-I\fR \fIpath\fR
.IP
Add path to search path entries.
.IP
This option may be given multiple times.
.SH SEE ALSO
|} in
let result = parse_manpage_string groff in
check "two entries" (List.length result.entries = 2);
if List.length result.entries >= 1 then begin
let e = List.hd result.entries in
check "arg is Long" (e.switch = Long "arg");
check "arg has param" (e.param <> None)
end;
if List.length result.entries >= 2 then begin
let e = List.nth result.entries 1 in
check "include is Both" (e.switch = Both ('I', "include"));
check "include has path param" (e.param = Some (Mandatory "path"))
end
let test_synopsis_subcommand () =
Printf.printf "\n== SYNOPSIS subcommand detection ==\n";
let groff = {|.SH "SYNOPSIS"
.sp
.nf
\fBgit\fR \fBcommit\fR [\fB\-a\fR | \fB\-\-interactive\fR]
.fi
.SH "DESCRIPTION"
|} in
let cmd = extract_synopsis_command groff in
check "detected git commit" (cmd = Some "git commit")
let test_synopsis_standalone () =
Printf.printf "\n== SYNOPSIS standalone command ==\n";
let groff = {|.SH Synopsis
.LP
\f(CRnix-build\fR [\fIpaths\fR]
.SH Description
|} in
let cmd = extract_synopsis_command groff in
check "detected nix-build" (cmd = Some "nix-build")
let test_synopsis_nix3 () =
Printf.printf "\n== SYNOPSIS nix3 subcommand ==\n";
let groff = {|.SH Synopsis
.LP
\f(CRnix run\fR [\fIoption\fR] \fIinstallable\fR
.SH Description
|} in
let cmd = extract_synopsis_command groff in
check "detected nix run" (cmd = Some "nix run")
(* --- Nushell generation tests --- *)
let contains s sub =
try
let _ = Str.search_forward (Str.regexp_string sub) s 0 in true
with Not_found -> false
let test_nushell_basic () =
Printf.printf "\n== Nushell basic extern ==\n";
let r = parse " -a, --all do not ignore entries starting with .\n" in
let nu = generate_extern "ls" r in
check "has extern" (contains nu "export extern \"ls\"");
check "has --all(-a)" (contains nu "--all(-a)");
check "has comment" (contains nu "# do not ignore")
let test_nushell_param_types () =
Printf.printf "\n== Nushell param type mapping ==\n";
let r = parse {| -w, --width=COLS set output width
--block-size=SIZE scale sizes
-o, --output FILE output file
|} in
let nu = generate_extern "ls" r in
check "COLS -> int" (contains nu "--width(-w): int");
check "SIZE -> string" (contains nu "--block-size: string");
check "FILE -> path" (contains nu "--output(-o): path")
let test_nushell_subcommands () =
Printf.printf "\n== Nushell subcommands ==\n";
let r = parse {|Common Commands:
run Create and run a new container
exec Execute a command
Flags:
-D, --debug Enable debug mode
|} in
let nu = generate_extern "docker" r in
check "has main extern" (contains nu "export extern \"docker\"");
check "has --debug" (contains nu "--debug(-D)");
check "has run subcommand" (contains nu "export extern \"docker run\"");
check "has exec subcommand" (contains nu "export extern \"docker exec\"")
let test_nushell_from_manpage () =
Printf.printf "\n== Nushell from manpage ==\n";
let groff = {|.SH OPTIONS
.TP
\fB\-a\fR, \fB\-\-all\fR
do not ignore entries starting with .
.TP
\fB\-\-block\-size\fR=\fISIZE\fR
scale sizes by SIZE
.SH AUTHOR
|} in
let result = parse_manpage_string groff in
let nu = generate_extern "ls" result in
check "has extern" (contains nu "export extern \"ls\"");
check "has --all(-a)" (contains nu "--all(-a)");
check "has --block-size" (contains nu "--block-size: string")
let test_nushell_module () =
Printf.printf "\n== Nushell module wrapper ==\n";
let r = parse " -v, --verbose verbose output\n" in
let nu = generate_module "myapp" r in
check "has module" (contains nu "module myapp-completions");
check "has extern inside" (contains nu "export extern \"myapp\"");
check "has flag" (contains nu "--verbose(-v)")
let test_dedup_entries () =
Printf.printf "\n== Deduplication ==\n";
let r = parse {| -v, --verbose verbose output
--verbose verbose mode
-v be verbose
|} in
let nu = generate_extern "test" r in
(* Count occurrences of --verbose *)
let count =
let re = Str.regexp_string "--verbose" in
let n = ref 0 in
let i = ref 0 in
(try while true do
let _ = Str.search_forward re nu !i in
incr n; i := Str.match_end ()
done with Not_found -> ());
!n
in
check "verbose appears once" (count = 1);
check "best version kept (Both)" (contains nu "--verbose(-v)")
let test_dedup_manpage () =
Printf.printf "\n== Dedup from manpage ==\n";
let groff = {|.SH OPTIONS
.TP
\fB\-v\fR, \fB\-\-verbose\fR
Be verbose.
.SH DESCRIPTION
Use \fB\-v\fR for verbose output.
Use \fB\-\-verbose\fR to see more.
|} in
let result = parse_manpage_string groff in
let nu = generate_extern "test" result in
check "has --verbose(-v)" (contains nu "--verbose(-v)");
(* Should not have standalone -v or duplicate --verbose *)
let lines = String.split_on_char '\n' nu in
let verbose_lines = List.filter (fun l -> contains l "verbose") lines in
check "only one verbose line" (List.length verbose_lines = 1)
let test_commands_section_subcommands () =
Printf.printf "\n== COMMANDS section subcommand extraction ==\n";
(* manpages like systemctl have a COMMANDS section with bold command names
* inside .PP + .RS/.RE blocks. these should be extracted as subcommands
* and treated as leaf nodes (no entries of their own). *)
let groff = {|.SH OPTIONS
.TP
\fB\-\-user\fR
Talk to the service manager of the calling user.
.TP
\fB\-\-system\fR
Talk to the service manager of the system.
.SH COMMANDS
.PP
\fBstart\fR \fIUNIT\fR\&...
.RS 4
Start (activate) one or more units.
.RE
.PP
\fBstop\fR \fIUNIT\fR\&...
.RS 4
Stop (deactivate) one or more units.
.RE
.PP
\fBreload\fR \fIUNIT\fR\&...
.RS 4
Asks all units to reload their configuration.
.RE
.SH SEE ALSO
|} in
let result = parse_manpage_string groff in
check "has options entries" (List.length result.entries = 2);
check "has subcommands" (List.length result.subcommands = 3);
let sc_names = List.map (fun (sc : subcommand) -> sc.name) result.subcommands in
check "has start" (List.mem "start" sc_names);
check "has stop" (List.mem "stop" sc_names);
check "has reload" (List.mem "reload" sc_names);
(* verify subcommand descriptions are extracted *)
let start_sc = List.find (fun (sc : subcommand) -> sc.name = "start") result.subcommands in
check "start has desc" (String.length start_sc.desc > 0)
let test_self_listing_detection () =
Printf.printf "\n== Self-listing subcommand detection ==\n";
(* when a subcommand's --help shows the parent's help text,
* the subcommand name appears in its own subcommand list.
* the parser should detect this tested via parse_help. *)
let help_text = {|systemctl [OPTIONS...] COMMAND ...
Unit Commands:
start UNIT... Start (activate) one or more units
stop UNIT... Stop (deactivate) one or more units
status [PATTERN...] Show runtime status
Options:
--user Talk to the user service manager
--system Talk to the system service manager
|} in
let r = parse help_text in
let has_start = List.exists (fun (sc : subcommand) -> sc.name = "start") r.subcommands in
check "detected start as subcommand" has_start;
(* the self-listing logic (in main.ml) would check: is "start" in r.subcommands?
* here we just verify the parser extracts it correctly. *)
check "has entries too" (List.length r.entries >= 2)
let test_nu_file_parsing () =
Printf.printf "\n== .nu file parsing ==\n";
let nu_source = {|module completions {
# Unofficial CLI tool
export extern mytool [
--help(-h) # Print help
--version(-V) # Print version
]
# List all items
export extern "mytool list" [
--raw # Output as JSON
--format(-f): string # Output format
--help(-h) # Print help
name?: string # Filter by name
]
}
use completions *
|} in
let r = Inshellah.Store.parse_nu_completions "mytool" nu_source in
check "has entries" (List.length r.entries = 2);
check "has subcommands" (List.length r.subcommands >= 1);
let list_sc = List.find_opt (fun (sc : subcommand) -> sc.name = "list") r.subcommands in
check "has list subcommand" (list_sc <> None);
check "description" (r.description = "Unofficial CLI tool");
(* test subcommand lookup *)
let r2 = Inshellah.Store.parse_nu_completions "mytool list" nu_source in
check "list has entries" (List.length r2.entries = 3);
let has_format = List.exists (fun (e : entry) ->
e.switch = Both ('f', "format")) r2.entries in
check "list has --format(-f)" has_format;
check "list has positional" (List.length r2.positionals >= 1)
let test_italic_synopsis () =
Printf.printf "\n== Italic in SYNOPSIS ==\n";
let groff = {|.SH Synopsis
.LP
\f(CRnix-env\fR \fIoperation\fR [\fIoptions\fR] [\fIarguments\fR]
.SH Description
|} in
let cmd = extract_synopsis_command groff in
check "no phantom operation" (cmd = Some "nix-env")
let test_font_boundary_spacing () =
Printf.printf "\n== Font boundary spacing ==\n";
(* \fB--max-results\fR\fIcount\fR should become "--max-results count" *)
let s = strip_groff_escapes {|\fB\-\-max\-results\fR\fIcount\fR|} in
check "has space before param" (contains s "--max-results count");
(* \fB--color\fR[=\fIWHEN\fR] should NOT insert space before = *)
let s2 = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
check "no space before =" (contains s2 "--color[=WHEN]")
let () =
Printf.printf "Running help parser tests...\n";
test_gnu_basic ();
test_gnu_eq_param ();
test_gnu_opt_param ();
test_underscore_param ();
test_short_only ();
test_long_only ();
test_multiline_desc ();
test_multiple_entries ();
test_clap_short_sections ();
test_clap_long_style ();
test_clap_long_angle_param ();
test_space_upper_param ();
test_go_cobra_flags ();
test_go_cobra_subcommands ();
test_busybox_tab ();
test_no_debug_prints ();
Printf.printf "\nRunning manpage parser tests...\n";
test_manpage_tp_style ();
test_manpage_ip_style ();
test_manpage_groff_stripping ();
test_manpage_empty_options ();
test_slash_switch_separator ();
test_manpage_nix3_style ();
test_manpage_nix3_with_params ();
test_synopsis_subcommand ();
test_synopsis_standalone ();
test_synopsis_nix3 ();
Printf.printf "\nRunning nushell generation tests...\n";
test_nushell_basic ();
test_nushell_param_types ();
test_nushell_subcommands ();
test_nushell_from_manpage ();
test_nushell_module ();
Printf.printf "\nRunning dedup and font tests...\n";
test_dedup_entries ();
test_dedup_manpage ();
test_font_boundary_spacing ();
Printf.printf "\nRunning COMMANDS section tests...\n";
test_commands_section_subcommands ();
test_self_listing_detection ();
Printf.printf "\nRunning .nu and synopsis tests...\n";
test_nu_file_parsing ();
test_italic_synopsis ();
Printf.printf "\n=== Results: %d passed, %d failed ===\n" !passes !failures;
if !failures > 0 then exit 1

78
tests/git_clone_fix.rs Normal file
View file

@ -0,0 +1,78 @@
use inshellah::parsers::help::help_parser;
#[test]
fn parser_recovers_past_no_bracket_long_form() {
// git clone -h produces lines like `--[no-]progress` that switch_parser
// can't parse. previously the help parser got stuck on these because
// skip_non_option_line refused to skip option-looking lines. now it falls
// through to skip, letting the parser continue to the next real entry.
let text = r#"usage: git clone [<options>] [--] <repo> [<dir>]
-v, --[no-]verbose be more verbose
-q, --[no-]quiet be more quiet
--[no-]progress force progress reporting
--[no-]reject-shallow don't clone shallow repository
-n, --no-checkout don't create a checkout
--checkout opposite of --no-checkout
-s, --[no-]shared setup as shared repository
"#;
let (_, r) = help_parser(text).expect("parse");
// before the fix: only 2 entries (-v, -q) before the parser got stuck.
// after: -v, -q, -n/--no-checkout, --checkout, -s, plus any others.
assert!(
r.entries.len() >= 4,
"expected ≥4 entries, got {}",
r.entries.len()
);
assert!(
r.entries.iter().any(|e| {
matches!(
&e.switch,
inshellah::types::Switch::Both('v', l) if *l == "verbose"
)
}),
"expected -v/--verbose from --[no-]verbose, got {:?}",
r.entries.len()
);
}
#[test]
fn parser_keeps_negatable_params() {
let text = r#"usage: git clone [<options>] [--] <repo> [<dir>]
-j, --[no-]jobs <n> number of submodules cloned in parallel
--[no-]recurse-submodules[=<pathspec>]
initialize submodules in the clone
--[no-]reject-shallow don't clone shallow repository
"#;
let (_, r) = help_parser(text).expect("parse");
let jobs = r
.entries
.iter()
.find(|e| matches!(&e.switch, inshellah::types::Switch::Both('j', l) if *l == "jobs"))
.expect("jobs entry");
assert!(matches!(
&jobs.param,
Some(inshellah::types::Param::Mandatory("n"))
));
let recurse = r
.entries
.iter()
.find(|e| matches!(&e.switch, inshellah::types::Switch::Long(l) if *l == "recurse-submodules"))
.expect("recurse-submodules entry");
assert!(matches!(
&recurse.param,
Some(inshellah::types::Param::Optional("pathspec"))
));
let reject = r
.entries
.iter()
.find(|e| matches!(&e.switch, inshellah::types::Switch::Long(l) if *l == "reject-shallow"))
.expect("reject-shallow entry");
assert!(
reject.param.is_none(),
"reject-shallow should not parse prose as a param"
);
}

150
tests/manpage_cli.rs Normal file
View file

@ -0,0 +1,150 @@
use std::fs;
use std::process::Command;
use std::time::{SystemTime, UNIX_EPOCH};
fn unique_temp_dir(name: &str) -> std::path::PathBuf {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system time")
.as_nanos();
std::env::temp_dir().join(format!("{name}-{}-{nanos}", std::process::id()))
}
#[test]
fn manpage_command_uses_synopsis_name() {
let root = unique_temp_dir("inshellah-manpage-cli");
fs::create_dir_all(&root).expect("temp dir");
let manpage = root.join("btrfs-check.8");
fs::write(
&manpage,
r#".SH SYNOPSIS
btrfs check [options] <device>
.SH OPTIONS
.TP
\fB\-\-repair\fR
try to repair the filesystem
"#,
)
.expect("write manpage");
let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("manpage")
.arg(&manpage)
.output()
.expect("run inshellah manpage");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8(output.stdout).expect("stdout");
assert!(
stdout.contains("export extern \"btrfs check\""),
"stdout = {stdout}"
);
assert!(
!stdout.contains("export extern \"btrfs-check\""),
"stdout = {stdout}"
);
let _ = fs::remove_dir_all(root);
}
#[test]
fn manpage_command_strips_git_style_subcommand_prefixes() {
let root = unique_temp_dir("inshellah-manpage-cli");
fs::create_dir_all(&root).expect("temp dir");
let manpage = root.join("git.1");
fs::write(
&manpage,
r#".SH SYNOPSIS
git [--version] [--help] <command> [<args>]
.SH OPTIONS
.TP
\fB\-\-version\fR
show version
.SH "GIT COMMANDS"
.SS "Main porcelain commands"
.PP
.BR git-add (1)
.RS 4
Add file contents to the index.
.RE
"#,
)
.expect("write manpage");
let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("manpage")
.arg(&manpage)
.output()
.expect("run inshellah manpage");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8(output.stdout).expect("stdout");
assert!(
stdout.contains("export extern \"git add\""),
"stdout = {stdout}"
);
assert!(
!stdout.contains("export extern \"git git-add\""),
"stdout = {stdout}"
);
let _ = fs::remove_dir_all(root);
}
#[test]
fn manpage_command_falls_back_when_synopsis_starts_with_prose() {
let root = unique_temp_dir("inshellah-manpage-cli");
fs::create_dir_all(&root).expect("temp dir");
let manpage = root.join("ld.so.8");
fs::write(
&manpage,
r#".SH SYNOPSIS
The dynamic linker can be run either indirectly by running some
dynamically linked program or shared object
(in which case no command-line options
to the dynamic linker can be passed and, in the ELF case, the dynamic linker
which is stored in the
.B .interp
section of the program is executed) or directly by running:
.P
.I /lib/ld\-linux.so.*
[OPTIONS] [PROGRAM [ARGUMENTS]]
.SH OPTIONS
.TP
.BI \-\-argv0\~ string
Set argv[0] to the value string.
"#,
)
.expect("write manpage");
let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("manpage")
.arg(&manpage)
.output()
.expect("run inshellah manpage");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8(output.stdout).expect("stdout");
assert!(
stdout.contains("export extern \"ld.so\""),
"stdout = {stdout}"
);
assert!(
!stdout.contains("export extern \"The\""),
"stdout = {stdout}"
);
let _ = fs::remove_dir_all(root);
}

128
tests/nushell-completer.nu Normal file
View file

@ -0,0 +1,128 @@
def fail [msg: string] {
error make {msg: $msg}
}
def assert-eq [actual expected msg: string] {
if $actual != $expected {
fail $"($msg): expected ($expected | to nuon), got ($actual | to nuon)"
}
}
def assert-contains [items needle msg: string] {
if not ($needle in $items) {
fail $"($msg): expected ($items | to nuon) to contain ($needle | to nuon)"
}
}
def values [items] {
$items | default [] | get value
}
let completer = $env.config.completions.external.completer
def _assert_elevation_wrappers_accept_command_tails [p: path] {
sudo nix-env --set -p /nix/var/nix/profiles/system $p
doas nix-env --set -p /nix/var/nix/profiles/system $p
}
'[{"value":"--static","description":"from static cache"}]' | save --force $env.INSHELLAH_STATIC_FILE
let static_result = do $completer [demo ""]
assert-eq ($static_result | get 0.value) "--static" "static completion pass-through"
'[{"value":"--server","description":"from static cache"},{"value":"--preserve","description":"from static cache"}]' | save --force $env.INSHELLAH_STATIC_FILE
let static_fuzzy_result = do $completer [demo ser]
assert-eq (values $static_fuzzy_result) ['--server' '--preserve'] "static fuzzy completions are not refiltered by shim"
"{" | save --force $env.INSHELLAH_STATIC_FILE
let bad_static_result = do $completer [demo ""]
assert-eq $bad_static_result null "bad static JSON falls back cleanly"
"" | save --force $env.INSHELLAH_STATIC_FILE
assert-eq (do $completer [nix]) null "nix completion ignores too-short spans"
let nix_commands = do $completer [nix ""]
assert-eq ($nix_commands | get 0.value) "build" "nix command completion uses NIX_GET_COMPLETIONS"
let nix_pkg = do $completer [nix "flake#pkg"]
assert-eq ($nix_pkg | get 0.description) "raw package description" "nix descriptions are raw strings"
let systemctl_empty = do $completer [systemctl daemon-reload ""]
assert-eq $systemctl_empty null "systemctl does not offer units for non-unit verbs"
let systemctl_units = do $completer [systemctl status ""]
assert-eq ($systemctl_units | get 0.value) "demo.service" "systemctl offers units for unit verbs"
let systemctl_prefixed_units = do $completer [systemctl start g]
assert-eq ($systemctl_prefixed_units | get 0.value) "greetd.service" "systemctl unit completions accept typed prefixes"
let kubectl_pods = do $completer [kubectl get pods -n prod ""]
assert-eq ($kubectl_pods | get 0.value) "pod-a" "kubectl resource names complete"
assert-eq (open $env.KUBECTL_ARGS_FILE | str contains "-n prod") true "kubectl preserves namespace flags"
let kubectl_rollout = do $completer [kubectl rollout status deployment ""]
assert-eq ($kubectl_rollout | get 0.description) "deployment" "kubectl rollout uses resource kind, not action"
let cargo_packages = do $completer [cargo test -p ""]
assert-eq (values $cargo_packages) [app-lib helper-lib] "cargo -p completes packages"
let cargo_bins = do $completer [cargo run --bin ""]
assert-eq (values $cargo_bins) [app-cli] "cargo --bin completes only bin targets"
"[]" | save --force $env.INSHELLAH_STATIC_FILE
let git_top = do $completer [git ""]
assert-contains (values $git_top) "remote" "git top-level completes common commands"
assert-contains (values $git_top) "stash" "git top-level includes stash"
let git_push = do $completer [git push ""]
assert-eq (values $git_push) [origin upstream] "empty static completions fall through to git remotes"
let git_remote_verbs = do $completer [git remote ""]
assert-eq (values $git_remote_verbs) [add rename remove rm set-head set-branches get-url set-url show prune update] "git remote completes subcommands"
let git_remote_filtered = do $completer [git remote sho]
assert-eq (values $git_remote_filtered) [show] "git remote subcommands filter by typed prefix"
let git_remote_fuzzy = do $completer [git remote shw]
assert-eq (values $git_remote_fuzzy) [show] "git remote subcommands use fuzzy filtering"
let git_remote_exact = do $completer [git remote show]
assert-eq $git_remote_exact null "exact dynamic completion disappears"
let git_remote_show = do $completer [git remote show ""]
assert-eq (values $git_remote_show) [origin upstream] "git remote show completes remote names"
let git_fetch = do $completer [git fetch ""]
assert-eq (values $git_fetch) [origin upstream] "git fetch completes remotes"
let git_fetch_ref = do $completer [git fetch origin ""]
assert-contains (values $git_fetch_ref) "main" "git fetch after remote completes refs"
let git_branch_delete = do $completer [git branch -d ""]
assert-eq (values $git_branch_delete) [main feature] "git branch delete completes local branches"
let git_tag_delete = do $completer [git tag -d ""]
assert-eq (values $git_tag_delete) [v1.0 v2.0] "git tag delete completes tags"
let git_stash_apply = do $completer [git stash apply ""]
assert-eq (values $git_stash_apply) ['stash@{0}'] "git stash apply completes stashes"
let git_submodule_update = do $completer [git submodule update ""]
assert-eq (values $git_submodule_update) [deps/demo] "git submodule update completes submodule paths"
let git_bisect = do $completer [git bisect ""]
assert-contains (values $git_bisect) "good" "git bisect completes subcommands"
let git_bisect_good = do $completer [git bisect good ""]
assert-contains (values $git_bisect_good) "main" "git bisect good completes refs"
let git_add_paths = do $completer [git add ""]
assert-eq (values $git_add_paths) [src/main.rs new-file.txt renamed.txt] "git add completes changed paths"
let git_rm_paths = do $completer [git rm ""]
assert-eq (values $git_rm_paths) [src/main.rs README.md] "git rm completes tracked paths"
"" | save --force $env.INSHELLAH_STATIC_FILE
let git_worktree_add = do $completer [git worktree add ""]
assert-eq $git_worktree_add null "git worktree add first argument falls back to files"
let git_worktree_remove = do $completer [git worktree remove ""]
assert-eq ($git_worktree_remove | get 0.value) "/repo/linked" "git worktree remove completes existing worktrees"
"[]" | save --force $env.INSHELLAH_STATIC_FILE
let jj_top = do $completer [jj ""]
assert-contains (values $jj_top) "bookmark" "jj top-level completes common commands"
assert-contains (values $jj_top) "git" "jj top-level includes git command"
let jj_bookmarks = do $completer [jj bookmark delete ""]
assert-eq (values $jj_bookmarks) [main feature origin/main] "jj bookmark delete completes bookmarks"
let jj_tags = do $completer [jj tag delete ""]
assert-eq (values $jj_tags) [v1.0 v2.0] "jj tag delete completes tags"
let jj_git_fetch = do $completer [jj git fetch ""]
assert-eq (values $jj_git_fetch) [origin upstream] "jj git fetch completes remotes"
let jj_git_remote_verbs = do $completer [jj git remote ""]
assert-eq (values $jj_git_remote_verbs) [add list remove rename set-url] "jj git remote completes subcommands"
let jj_git_remote_remove = do $completer [jj git remote remove ""]
assert-eq (values $jj_git_remote_remove) [origin upstream] "jj git remote remove completes remotes"
let jj_revs = do $completer [jj rebase -d ""]
assert-eq (values $jj_revs) [k m] "jj revision flags complete revisions"
let jj_ops = do $completer [jj op restore ""]
assert-eq (values $jj_ops) [abc123] "jj op restore completes operations"
let jj_files = do $completer [jj file show ""]
assert-eq (values $jj_files) [src/main.rs README.md] "jj file show completes repo files"
let jj_workspaces = do $completer [jj workspace forget ""]
assert-eq (values $jj_workspaces) [default linked] "jj workspace forget completes workspaces"
"" | save --force $env.INSHELLAH_STATIC_FILE

915
tests/ports.rs Normal file
View file

@ -0,0 +1,915 @@
//! Tests ported from ../inshellah/test/test_inshellah.ml.
//!
//! Covers the help parser, manpage parser, groff stripping, and nushell
//! generation. The single .nu store parser test (`test_nu_file_parsing`) is
//! not included — it requires porting store.ml first.
use inshellah::parsers::help::help_parser;
use inshellah::parsers::manpage::{
ManpageResult, OwnedParam, OwnedSwitch, extract_synopsis_command, parse_manpage_string,
strip_groff_escapes,
};
use inshellah::parsers::nushell::{generate_extern, generate_module};
use inshellah::store::{json_of_result, parse_nu_completions, result_from_json};
use inshellah::types::{HelpResult, Param, Switch};
fn parse(txt: &str) -> HelpResult<'_> {
match help_parser(txt) {
Ok((_, r)) => r,
Err(e) => panic!("parse_help failed: {e:?}"),
}
}
// --- Help parser tests ---
#[test]
fn gnu_basic() {
let r = parse(" -a, --all do not ignore entries starting with .\n");
assert_eq!(r.entries.len(), 1);
let e = &r.entries[0];
assert!(matches!(&e.switch, Switch::Both('a', l) if *l == "all"));
assert!(e.param.is_none());
assert!(!e.desc.is_empty());
}
#[test]
fn gnu_eq_param() {
let r = parse(" --block-size=SIZE scale sizes by SIZE\n");
assert_eq!(r.entries.len(), 1);
let e = &r.entries[0];
assert!(matches!(&e.switch, Switch::Long(l) if *l == "block-size"));
assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "SIZE"));
}
#[test]
fn gnu_opt_param() {
let r = parse(" --color[=WHEN] color the output WHEN\n");
assert_eq!(r.entries.len(), 1);
let e = &r.entries[0];
assert!(matches!(&e.switch, Switch::Long(l) if *l == "color"));
assert!(matches!(&e.param, Some(Param::Optional(p)) if *p == "WHEN"));
}
#[test]
fn underscore_param() {
let r = parse(" --time-style=TIME_STYLE time/date format\n");
assert_eq!(r.entries.len(), 1);
let e = &r.entries[0];
assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "TIME_STYLE"));
}
#[test]
fn short_only() {
let r = parse(" -v verbose output\n");
assert_eq!(r.entries.len(), 1);
assert!(matches!(r.entries[0].switch, Switch::Short('v')));
}
#[test]
fn long_only() {
let r = parse(" --help display help\n");
assert_eq!(r.entries.len(), 1);
assert!(matches!(&r.entries[0].switch, Switch::Long(l) if *l == "help"));
}
#[test]
fn multiline_desc() {
let txt = " --block-size=SIZE with -l, scale sizes by SIZE when printing them;\n e.g., '--block-size=M'; see SIZE format below\n";
let r = parse(txt);
assert_eq!(r.entries.len(), 1);
let combined: String = r.entries[0].desc.join(" ");
assert!(combined.len() > 50, "desc was: {combined}");
}
#[test]
fn multiple_entries() {
let txt = " -a, --all do not ignore entries starting with .\n -A, --almost-all do not list implied . and ..\n --author with -l, print the author of each file\n";
let r = parse(txt);
assert_eq!(r.entries.len(), 3);
}
#[test]
fn clap_short_sections() {
let txt = "INPUT OPTIONS:\n -e, --regexp=PATTERN A pattern to search for.\n -f, --file=PATTERNFILE Search for patterns from the given file.\nSEARCH OPTIONS:\n -s, --case-sensitive Search case sensitively.\n";
let r = parse(txt);
assert_eq!(r.entries.len(), 3);
let e = &r.entries[0];
assert!(matches!(&e.switch, Switch::Both('e', l) if *l == "regexp"));
assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "PATTERN"));
}
#[test]
fn clap_long_style() {
let txt = " -H, --hidden\n Include hidden directories and files.\n\n --no-ignore\n Do not respect ignore files.\n";
let r = parse(txt);
assert_eq!(r.entries.len(), 2);
let e = &r.entries[0];
assert!(matches!(&e.switch, Switch::Both('H', l) if *l == "hidden"));
assert!(!e.desc.is_empty());
}
#[test]
fn clap_long_angle_param() {
let txt = " --nonprintable-notation <notation>\n Set notation for non-printable characters.\n";
let r = parse(txt);
assert_eq!(r.entries.len(), 1);
let e = &r.entries[0];
assert!(matches!(&e.switch, Switch::Long(l) if *l == "nonprintable-notation"));
assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "notation"));
}
#[test]
fn space_upper_param() {
let r = parse(" -f, --foo FOO foo help\n");
assert_eq!(r.entries.len(), 1);
let e = &r.entries[0];
assert!(matches!(&e.switch, Switch::Both('f', l) if *l == "foo"));
assert!(matches!(&e.param, Some(Param::Mandatory(p)) if *p == "FOO"));
}
#[test]
fn go_cobra_flags() {
let txt = "Flags:\n -D, --debug Enable debug mode\n -H, --host string Daemon socket to connect to\n -v, --version Print version information\n";
let r = parse(txt);
assert_eq!(r.entries.len(), 3);
let host = &r.entries[1];
assert!(matches!(&host.switch, Switch::Both('H', l) if *l == "host"));
assert!(matches!(&host.param, Some(Param::Mandatory(p)) if *p == "string"));
}
#[test]
fn go_cobra_subcommands() {
let txt = "Common Commands:\n run Create and run a new container from an image\n exec Execute a command in a running container\n build Build an image from a Dockerfile\n";
let r = parse(txt);
assert!(
!r.subcommands.is_empty(),
"expected subcommands, got: {:?}",
r.subcommands.len()
);
}
#[test]
fn help_parser_ignores_value_enums_and_defaults() {
let txt = r#"Usage: tar [OPTION...] [FILE]...
Main operation mode:
-c, --create create a new archive
Archive format selection:
-H, --format=FORMAT create archive of the given format
FORMAT is one of the following:
gnu GNU tar 1.13.x format
oldgnu GNU format as per tar <= 1.12
pax POSIX 1003.1-2001 (pax) format
posix same as pax
ustar POSIX 1003.1-1988 (ustar) format
v7 old V7 tar format
*This* tar defaults to:
--format=gnu -f- -b20 --quoting-style=escape
--rmt-command=/nix/store/example/libexec/rmt
"#;
let r = parse(txt);
assert!(
r.subcommands.is_empty(),
"enum values became subcommands: {:?}",
r.subcommands.len()
);
assert!(
!r.entries
.iter()
.any(|e| matches!(&e.switch, Switch::Long(l) if *l == "rmt-command")),
"default lines should not become flags"
);
assert!(
r.entries
.iter()
.any(|e| matches!(&e.switch, Switch::Both('H', l) if *l == "format")),
"real option should still be parsed"
);
}
#[test]
fn busybox_tab() {
let r = parse("\t-1\tOne column output\n\t-a\tInclude names starting with .\n");
assert_eq!(r.entries.len(), 2);
assert!(matches!(r.entries[0].switch, Switch::Short('1')));
}
#[test]
fn no_debug_prints() {
// the old ocaml parser had print_endline at module load time; this test
// documents that no such side effects exist in the rust port.
let _ = parse(" -v verbose\n");
}
#[test]
fn slash_switch_separator() {
let r = parse(" --verbose / -v Increase verbosity\n");
assert_eq!(r.entries.len(), 1);
let e = &r.entries[0];
assert!(matches!(&e.switch, Switch::Both('v', l) if *l == "verbose"));
assert!(e.param.is_none());
let combined: String = e.desc.join(" ");
assert_eq!(combined.trim(), "Increase verbosity");
}
// --- Manpage parser tests ---
#[test]
fn manpage_tp_style() {
let groff = r#".SH OPTIONS
.TP
\fB\-a\fR, \fB\-\-all\fR
do not ignore entries starting with .
.TP
\fB\-A\fR, \fB\-\-almost\-all\fR
do not list implied . and ..
.TP
\fB\-\-block\-size\fR=\fISIZE\fR
with \fB\-l\fR, scale sizes by SIZE
.SH AUTHOR
Written by someone.
"#;
let r = parse_manpage_string(groff);
assert_eq!(r.entries.len(), 3, "entries: {:?}", r.entries);
assert!(matches!(&r.entries[0].switch, OwnedSwitch::Both('a', l) if l == "all"));
assert!(!r.entries[0].desc.is_empty());
assert!(matches!(&r.entries[2].switch, OwnedSwitch::Long(l) if l == "block-size"));
assert!(matches!(&r.entries[2].param, Some(OwnedParam::Mandatory(p)) if p == "SIZE"));
}
#[test]
fn manpage_ip_style() {
let groff = r#".SH OPTIONS
.IP "\fB\-k\fR, \fB\-\-insecure\fR"
Allow insecure connections.
.IP "\fB\-o\fR, \fB\-\-output\fR \fIfile\fR"
Write output to file.
.SH SEE ALSO
"#;
let r = parse_manpage_string(groff);
assert_eq!(r.entries.len(), 2, "entries: {:?}", r.entries);
assert!(matches!(&r.entries[0].switch, OwnedSwitch::Both('k', l) if l == "insecure"));
}
#[test]
fn manpage_groff_stripping() {
let s = strip_groff_escapes(r#"\fB\-\-color\fR[=\fIWHEN\fR]"#);
// font escapes removed
assert!(!(s.contains('f') && s.contains('B') && s.contains('\\')));
// dashes converted
assert!(s.contains('-'));
let s2 = strip_groff_escapes(r#"\(aqhello\(aq"#);
assert!(s2.contains('\''), "expected apostrophe in: {s2}");
}
#[test]
fn manpage_getent_databases_from_description() {
let groff = r#".SH SYNOPSIS
.SY getent
.RI [ option \~.\|.\|.\&]
.I database
.IR key \~.\|.\|.
.YS
.SH DESCRIPTION
The
.I database
may be any of those supported by the GNU C Library, listed below:
.TP
.B passwd
When no
.I key
is provided, enumerate the passwd database.
.TP
.B services
When no
.I key
is provided, enumerate the services database.
.SH OPTIONS
.TP
.BI \-\-service\~ service
.TQ
.BI \-s\~ service
Override all databases with the specified service.
.TP
.BI \-\-service\~ database : service
.TQ
.BI \-s\~ database : service
Override only specified databases with the specified service.
.TP
.B \-\-usage
Print a short usage summary and exit.
"#;
let r = parse_manpage_string(groff);
let positional_names: Vec<&str> = r
.positionals
.iter()
.map(|(name, _)| name.as_str())
.collect();
assert_eq!(positional_names, vec!["database", "key"]);
let service = r
.entries
.iter()
.find(|e| matches!(&e.switch, OwnedSwitch::Both('s', name) if name == "service"))
.expect("expected --service(-s)");
assert!(matches!(
&service.param,
Some(OwnedParam::Mandatory(param)) if param == "service"
));
assert!(
!r.entries
.iter()
.any(|e| matches!(&e.switch, OwnedSwitch::Long(name) if name == "serviceservice" || name == "servicedatabase")),
"entries: {:?}",
r.entries
);
let subcommands: Vec<&str> = r.subcommands.iter().map(|sc| sc.name.as_str()).collect();
assert!(
subcommands.contains(&"passwd"),
"subcommands: {subcommands:?}"
);
assert!(
subcommands.contains(&"services"),
"subcommands: {subcommands:?}"
);
let nu = generate_extern("getent", &r);
assert!(nu.contains("database: string"), "nu = {nu}");
assert!(nu.contains("...key: string"), "nu = {nu}");
assert!(nu.contains("--service(-s): string"), "nu = {nu}");
assert!(!nu.contains("--servicedatabase"), "nu = {nu}");
assert!(nu.contains("export extern \"getent passwd\""), "nu = {nu}");
}
#[test]
fn manpage_b_macro_option_tag_with_embedded_quotes() {
let groff = r#".SH OPTIONS
.TP
.B "\-s ""\fIprogram\fR [\fIargument \fR...]\fB""\fR, \fB\-\-speller=""\fIprogram\fR [\fIargument \fR...]\fB"""
Use this command to perform spell checking and correcting.
"#;
let r = parse_manpage_string(groff);
assert!(
r.entries
.iter()
.any(|e| matches!(e.switch, OwnedSwitch::Short('s'))),
"entries: {:?}",
r.entries
);
}
#[test]
fn manpage_synopsis_b_macro_bracket_args_keep_spaces() {
let groff = r#".SH "SYNOPSIS"
.B "rtmon"
.RI "[ " OPTIONS " ] "
.BI "file " FILE
.BR "[ " all
.RI "| " OBJECTS
.RB "]"
.ti -8
.I OBJECTS
.B ":= [" link "]" "[" address "]" "[" route "]"
.SH OPTIONS
"#;
let r = parse_manpage_string(groff);
let positional_names: Vec<&str> = r
.positionals
.iter()
.map(|(name, _)| name.as_str())
.collect();
assert!(
!positional_names.contains(&"ptions")
&& positional_names.contains(&"link")
&& positional_names.contains(&"address"),
"positionals: {positional_names:?}"
);
}
#[test]
fn bracketed_angle_positionals_keep_inner_ellipsis() {
let groff = r#".SH SYNOPSIS
.B bzip2
.RB [ " \-cdfkqstvzVL123456789 " ]
[
.I "filenames \&..."
]
.SH OPTIONS
"#;
let r = parse_manpage_string(groff);
assert!(
r.positionals
.iter()
.any(|(name, positional)| name == "filenames" && positional.variadic),
"positionals: {:?}",
r.positionals
);
}
#[test]
fn nested_optional_positionals_keep_last_valid_inner_name() {
let groff = r#".SH SYNOPSIS
\fBfc-cat\fR [ \fB-rvVh\fR ]
[ \fB [ \fIfonts-cache-%version%-files\fB ] [ \fIdirs\fB ] \fR\fI...\fR ]
.SH OPTIONS
"#;
let r = parse_manpage_string(groff);
assert!(
r.positionals
.iter()
.any(|(name, positional)| name == "dirs" && positional.optional && positional.variadic),
"positionals: {:?}",
r.positionals
);
}
#[test]
fn manpage_empty_options() {
let groff = ".SH NAME\nfoo \\- does stuff\n.SH DESCRIPTION\nDoes stuff.\n";
let r = parse_manpage_string(groff);
assert_eq!(r.entries.len(), 0);
}
#[test]
fn manpage_nix3_style() {
let groff = r#".SH Options
.SS Logging-related options
.IP "\(bu" 3
.UR #opt-verbose
\f(CR--verbose\fR
.UE
/ \f(CR-v\fR
.IP
Increase the logging verbosity level.
.IP "\(bu" 3
.UR #opt-quiet
\f(CR--quiet\fR
.UE
.IP
Decrease the logging verbosity level.
.SH SEE ALSO
"#;
let r = parse_manpage_string(groff);
assert_eq!(r.entries.len(), 2, "entries: {:?}", r.entries);
assert!(matches!(&r.entries[0].switch, OwnedSwitch::Both('v', l) if l == "verbose"));
assert!(!r.entries[0].desc.is_empty());
assert!(matches!(&r.entries[1].switch, OwnedSwitch::Long(l) if l == "quiet"));
assert!(!r.entries[1].desc.is_empty());
}
#[test]
fn manpage_nix3_with_params() {
let groff = r#".SH Options
.IP "\(bu" 3
.UR #opt-arg
\f(CR--arg\fR
.UE
\fIname\fR \fIexpr\fR
.IP
Pass the value as the argument name to Nix functions.
.IP "\(bu" 3
.UR #opt-include
\f(CR--include\fR
.UE
/ \f(CR-I\fR \fIpath\fR
.IP
Add path to search path entries.
.IP
This option may be given multiple times.
.SH SEE ALSO
"#;
let r = parse_manpage_string(groff);
assert_eq!(r.entries.len(), 2, "entries: {:?}", r.entries);
assert!(matches!(&r.entries[0].switch, OwnedSwitch::Long(l) if l == "arg"));
assert!(r.entries[0].param.is_some());
assert!(matches!(&r.entries[1].switch, OwnedSwitch::Both('I', l) if l == "include"));
assert!(matches!(&r.entries[1].param, Some(OwnedParam::Mandatory(p)) if p == "path"));
}
#[test]
fn synopsis_subcommand() {
let groff = r#".SH "SYNOPSIS"
.sp
.nf
\fBgit\fR \fBcommit\fR [\fB\-a\fR | \fB\-\-interactive\fR]
.fi
.SH "DESCRIPTION"
"#;
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("git commit"));
}
#[test]
fn synopsis_standalone() {
let groff = ".SH Synopsis\n.LP\n\\f(CRnix-build\\fR [\\fIpaths\\fR]\n.SH Description\n";
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("nix-build"));
}
#[test]
fn synopsis_nix3() {
let groff = ".SH Synopsis\n.LP\n\\f(CRnix run\\fR [\\fIoption\\fR] \\fIinstallable\\fR\n.SH Description\n";
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("nix run"));
}
#[test]
fn italic_synopsis() {
let groff = ".SH Synopsis\n.LP\n\\f(CRnix-env\\fR \\fIoperation\\fR [\\fIoptions\\fR] [\\fIarguments…\\fR]\n.SH Description\n";
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("nix-env"));
}
#[test]
fn synopsis_italic_command_name() {
// git-am.1 (and many other git manpages) put the entire command
// invocation in italics: `\fIgit am\fR [...]`. should still resolve
// to "git am" rather than treating it as a placeholder.
let groff = ".SH \"SYNOPSIS\"\n.sp\n.nf\n\\fIgit am\\fR [\\-\\-signoff] [\\-\\-keep]\n.fi\n.SH \"DESCRIPTION\"\n";
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("git am"));
}
#[test]
fn synopsis_skips_prose_before_invocation() {
let groff = r#".SH SYNOPSIS
The dynamic linker can be run either indirectly by running some
dynamically linked program or shared object
(in which case no command-line options
to the dynamic linker can be passed and, in the ELF case, the dynamic linker
which is stored in the
.B .interp
section of the program is executed) or directly by running:
.P
.I /lib/ld\-linux.so.*
[OPTIONS] [PROGRAM [ARGUMENTS]]
.SH DESCRIPTION
"#;
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), None);
}
#[test]
fn synopsis_skips_labels_before_invocation() {
let groff = r#".SH "SYNOPSIS"
.sp
Set up a loop device:
.sp
\fBlosetup\fP [options] \fB\-f\fP|\fIloopdev file\fP
.sp
Get info:
.RS 4
\fBlosetup\fP \fIloopdev\fP
.RE
.SH "DESCRIPTION"
"#;
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("losetup"));
}
#[test]
fn synopsis_b_macro_preserves_command_spaces() {
let groff = r#".SH "SYNOPSIS"
.sp
.B ip link
.RI " { " COMMAND " | "
.BR help " }"
.SH "DESCRIPTION"
"#;
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("ip link"));
}
#[test]
fn synopsis_br_macro_preserves_quoted_command_spaces() {
let groff = r#".SH "SYNOPSIS"
.sp
.BR "ip monitor" " [ " all " |"
.IR OBJECT-LIST " ]"
.SH "DESCRIPTION"
"#;
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("ip monitor"));
}
#[test]
fn synopsis_long_b_macro_is_not_prose() {
let groff = r#".SH SYNOPSIS
.ad l
.in +8
.ti -8
.B tipc peer remove address
.IR ADDRESS
.SH OPTIONS
"#;
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("tipc peer remove address"));
}
#[test]
fn synopsis_ss_heading_is_accepted() {
let groff = r#".SH Name
.LP
\f(CRnix-env --set\fR - set profile to contain a specified derivation
.SS
Synopsis
.LP
\f(CRnix-env\fR \f(CR--set\fR \fIdrvname\fR
.SS
Description
"#;
let cmd = extract_synopsis_command(groff);
assert_eq!(cmd.as_deref(), Some("nix-env"));
}
// --- Font/dedup tests (only the font-spacing one is portable) ---
#[test]
fn font_boundary_spacing() {
// \fB--max-results\fR\fIcount\fR should become "--max-results count"
let s = strip_groff_escapes(r#"\fB\-\-max\-results\fR\fIcount\fR"#);
assert!(s.contains("--max-results count"), "got: {s}");
// \fB--color\fR[=\fIWHEN\fR] should NOT insert space before =
let s2 = strip_groff_escapes(r#"\fB\-\-color\fR[=\fIWHEN\fR]"#);
assert!(s2.contains("--color[=WHEN]"), "got: {s2}");
}
// --- COMMANDS section tests ---
#[test]
fn commands_section_subcommands() {
let groff = r#".SH OPTIONS
.TP
\fB\-\-user\fR
Talk to the service manager of the calling user.
.TP
\fB\-\-system\fR
Talk to the service manager of the system.
.SH COMMANDS
.PP
\fBstart\fR \fIUNIT\fR\&...
.RS 4
Start (activate) one or more units.
.RE
.PP
\fBstop\fR \fIUNIT\fR\&...
.RS 4
Stop (deactivate) one or more units.
.RE
.PP
\fBreload\fR \fIUNIT\fR\&...
.RS 4
Asks all units to reload their configuration.
.RE
.SH SEE ALSO
"#;
let r = parse_manpage_string(groff);
assert_eq!(r.entries.len(), 2, "options entries: {:?}", r.entries);
assert_eq!(r.subcommands.len(), 3, "subcommands: {:?}", r.subcommands);
let names: Vec<&str> = r.subcommands.iter().map(|sc| sc.name.as_str()).collect();
assert!(names.contains(&"start"));
assert!(names.contains(&"stop"));
assert!(names.contains(&"reload"));
let start_sc = r.subcommands.iter().find(|sc| sc.name == "start").unwrap();
assert!(!start_sc.desc.is_empty());
}
#[test]
fn commands_section_git_style_refs() {
let groff = r#".SH OPTIONS
.TP
\fB\-\-version\fR
Show version.
.SH "GIT COMMANDS"
.SS "Main porcelain commands"
.PP
.BR git-add (1)
.RS 4
Add file contents to the index.
.RE
.PP
\fBgit-commit\fR(1)
.RS 4
Record changes to the repository.
.RE
"#;
let r = parse_manpage_string(groff);
let names: Vec<&str> = r.subcommands.iter().map(|sc| sc.name.as_str()).collect();
assert!(
names.contains(&"git-add"),
"subcommands: {:?}",
r.subcommands
);
assert!(
names.contains(&"git-commit"),
"subcommands: {:?}",
r.subcommands
);
let add = r
.subcommands
.iter()
.find(|sc| sc.name == "git-add")
.unwrap();
assert!(add.desc.contains("Add file contents"));
}
// --- Nushell generation tests ---
fn to_owned_result(r: &HelpResult<'_>) -> ManpageResult {
r.into()
}
#[test]
fn nushell_basic() {
let r = parse(" -a, --all do not ignore entries starting with .\n");
let nu = generate_extern("ls", &to_owned_result(&r));
assert!(nu.contains("export extern \"ls\""), "nu = {nu}");
assert!(nu.contains("--all(-a)"), "nu = {nu}");
assert!(nu.contains("# do not ignore"), "nu = {nu}");
}
#[test]
fn nushell_param_types() {
let txt = " -w, --width=COLS set output width\n --block-size=SIZE scale sizes\n -o, --output FILE output file\n";
let r = parse(txt);
let nu = generate_extern("ls", &to_owned_result(&r));
assert!(nu.contains("--width(-w): int"), "nu = {nu}");
assert!(nu.contains("--block-size: string"), "nu = {nu}");
assert!(nu.contains("--output(-o): path"), "nu = {nu}");
}
#[test]
fn nushell_subcommands() {
let txt = "Common Commands:\n run Create and run a new container\n exec Execute a command\n\nFlags:\n -D, --debug Enable debug mode\n";
let r = parse(txt);
let nu = generate_extern("docker", &to_owned_result(&r));
assert!(nu.contains("export extern \"docker\""), "nu = {nu}");
assert!(nu.contains("--debug(-D)"), "nu = {nu}");
assert!(nu.contains("export extern \"docker run\""), "nu = {nu}");
assert!(nu.contains("export extern \"docker exec\""), "nu = {nu}");
}
#[test]
fn positional_order_survives_cache_and_generation() {
let txt = "usage: git clone [<options>] [--] <repository> [directory]\n";
let result = to_owned_result(&parse(txt));
assert_eq!(
result
.positionals
.iter()
.map(|(name, _)| name.as_str())
.collect::<Vec<_>>(),
vec!["repository", "directory"]
);
let json = json_of_result("help", &result);
let value = serde_json::from_str(&json).expect("cache json");
let cached = result_from_json(&value);
assert_eq!(
cached
.positionals
.iter()
.map(|(name, _)| name.as_str())
.collect::<Vec<_>>(),
vec!["repository", "directory"]
);
let nu = generate_extern("git clone", &cached);
let repository = nu
.find("repository: string")
.expect("repository positional");
let directory = nu.find("directory?: path").expect("directory positional");
assert!(repository < directory, "nu = {nu}");
}
#[test]
fn nushell_from_manpage() {
let groff = r#".SH OPTIONS
.TP
\fB\-a\fR, \fB\-\-all\fR
do not ignore entries starting with .
.TP
\fB\-\-block\-size\fR=\fISIZE\fR
scale sizes by SIZE
.SH AUTHOR
"#;
let result = parse_manpage_string(groff);
let nu = generate_extern("ls", &result);
assert!(nu.contains("export extern \"ls\""), "nu = {nu}");
assert!(nu.contains("--all(-a)"), "nu = {nu}");
assert!(nu.contains("--block-size: string"), "nu = {nu}");
}
#[test]
fn nushell_module() {
let r = parse(" -v, --verbose verbose output\n");
let nu = generate_module("myapp", &to_owned_result(&r));
assert!(nu.contains("module myapp-completions"), "nu = {nu}");
assert!(nu.contains("export extern \"myapp\""), "nu = {nu}");
assert!(nu.contains("--verbose(-v)"), "nu = {nu}");
}
#[test]
fn dedup_entries_help() {
let txt = " -v, --verbose verbose output\n --verbose verbose mode\n -v be verbose\n";
let r = parse(txt);
let nu = generate_extern("test", &to_owned_result(&r));
let count = nu.matches("--verbose").count();
assert_eq!(count, 1, "expected --verbose to appear once, nu = {nu}");
assert!(nu.contains("--verbose(-v)"), "nu = {nu}");
}
#[test]
fn dedup_manpage_entries() {
let groff = r#".SH OPTIONS
.TP
\fB\-v\fR, \fB\-\-verbose\fR
Be verbose.
.SH DESCRIPTION
Use \fB\-v\fR for verbose output.
Use \fB\-\-verbose\fR to see more.
"#;
let result = parse_manpage_string(groff);
let nu = generate_extern("test", &result);
assert!(nu.contains("--verbose(-v)"), "nu = {nu}");
let verbose_lines: Vec<&str> = nu.lines().filter(|l| l.contains("verbose")).collect();
assert_eq!(
verbose_lines.len(),
1,
"expected 1 verbose line, got: {verbose_lines:?}"
);
}
#[test]
fn nu_file_parsing() {
let nu_source = r#"module completions {
# Unofficial CLI tool
export extern mytool [
--help(-h) # Print help
--version(-V) # Print version
]
# List all items
export extern "mytool list" [
--raw # Output as JSON
--format(-f): string # Output format
--help(-h) # Print help
name?: string # Filter by name
]
}
use completions *
"#;
let r = parse_nu_completions("mytool", nu_source);
assert_eq!(r.entries.len(), 2, "entries: {:?}", r.entries);
assert!(
!r.subcommands.is_empty(),
"subcommands: {:?}",
r.subcommands
);
assert!(r.subcommands.iter().any(|sc| sc.name == "list"));
assert_eq!(r.description, "Unofficial CLI tool");
let r2 = parse_nu_completions("mytool list", nu_source);
assert_eq!(r2.entries.len(), 3, "list entries: {:?}", r2.entries);
let has_format = r2
.entries
.iter()
.any(|e| matches!(&e.switch, OwnedSwitch::Both('f', l) if l == "format"));
assert!(
has_format,
"list should have --format(-f): {:?}",
r2.entries
);
assert!(!r2.positionals.is_empty(), "list should have a positional");
}
#[test]
fn self_listing_detection() {
let txt = r#"systemctl [OPTIONS...] COMMAND ...
Unit Commands:
start UNIT... Start (activate) one or more units
stop UNIT... Stop (deactivate) one or more units
status [PATTERN...] Show runtime status
Options:
--user Talk to the user service manager
--system Talk to the system service manager
"#;
let r = parse(txt);
let has_start = r.subcommands.iter().any(|sc| sc.name == "start");
assert!(
has_start,
"expected start in subcommands: {:?}",
r.subcommands.iter().map(|sc| sc.name).collect::<Vec<_>>()
);
assert!(r.entries.len() >= 2);
}

500
tests/runtime_complete.rs Normal file
View file

@ -0,0 +1,500 @@
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::process::Command;
use std::time::{SystemTime, UNIX_EPOCH};
use inshellah::parsers::manpage::{ManpageEntry, ManpageResult, ManpageSubcommand, OwnedSwitch};
use inshellah::store::write_result;
fn unique_temp_dir(name: &str) -> std::path::PathBuf {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system time")
.as_nanos();
std::env::temp_dir().join(format!("{name}-{}-{nanos}", std::process::id()))
}
#[test]
fn complete_scrapes_missing_subcommand_when_parent_is_cached() {
let root = unique_temp_dir("inshellah-runtime-complete");
let bin_dir = root.join("bin");
let cache_dir = root.join("cache");
fs::create_dir_all(&bin_dir).expect("bin dir");
fs::create_dir_all(&cache_dir).expect("cache dir");
let fakecmd = bin_dir.join("fakecmd");
fs::write(
&fakecmd,
r#"#!/bin/sh
if [ "$1" = "clone" ]; then
if [ "$2" = "--help" ] || [ "$2" = "-h" ]; then
cat <<'EOF'
Usage: fakecmd clone [OPTIONS] <repository> [directory]
Options:
--depth <n> clone depth
-v, --verbose verbose
EOF
exit 0
fi
fi
if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
cat <<'EOF'
Usage: fakecmd [OPTIONS] COMMAND
Commands:
clone Clone a repository
Options:
-h, --help show help
EOF
exit 0
fi
exit 2
"#,
)
.expect("write fakecmd");
let mut perms = fs::metadata(&fakecmd).expect("metadata").permissions();
perms.set_mode(0o755);
fs::set_permissions(&fakecmd, perms).expect("chmod");
let parent = ManpageResult {
entries: Vec::new(),
subcommands: vec![ManpageSubcommand {
name: "clone".to_string(),
desc: "Clone a repository".to_string(),
}],
positionals: Vec::new(),
description: String::new(),
};
write_result(&cache_dir, "fakecmd", "help", &parent).expect("parent cache");
let old_path = std::env::var_os("PATH").unwrap_or_default();
let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("--timeout-ms")
.arg("1000")
.arg("fakecmd")
.arg("clone")
.arg("--")
.env(
"PATH",
format!("{}:{}", bin_dir.display(), old_path.to_string_lossy()),
)
.output()
.expect("run inshellah complete");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8(output.stdout).expect("stdout");
assert!(stdout.contains("--depth"), "stdout = {stdout}");
assert!(
cache_dir.join("fakecmd_clone.json").is_file(),
"subcommand cache was not written"
);
let _ = fs::remove_dir_all(root);
}
#[test]
fn complete_does_not_scan_path_at_command_position() {
let root = unique_temp_dir("inshellah-command-position-complete");
let bin_dir = root.join("bin");
let cache_dir = root.join("cache");
fs::create_dir_all(&bin_dir).expect("bin dir");
fs::create_dir_all(&cache_dir).expect("cache dir");
let fake_git = bin_dir.join("git");
fs::write(&fake_git, "#!/bin/sh\nexit 0\n").expect("write fake git");
let mut perms = fs::metadata(&fake_git).expect("metadata").permissions();
perms.set_mode(0o755);
fs::set_permissions(&fake_git, perms).expect("chmod");
let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("gi")
.env("PATH", &bin_dir)
.output()
.expect("run inshellah complete");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8(output.stdout).expect("stdout");
assert_eq!(stdout.trim(), "null", "stdout = {stdout}");
let _ = fs::remove_dir_all(root);
}
#[test]
fn complete_uses_boundary_aware_fuzzy_ranking() {
let root = unique_temp_dir("inshellah-fuzzy-complete");
let cache_dir = root.join("cache");
fs::create_dir_all(&cache_dir).expect("cache dir");
let result = ManpageResult {
entries: Vec::new(),
subcommands: vec![
ManpageSubcommand {
name: "load".to_string(),
desc: "load something".to_string(),
},
ManpageSubcommand {
name: "clone".to_string(),
desc: "clone something".to_string(),
},
],
positionals: Vec::new(),
description: String::new(),
};
write_result(&cache_dir, "demo", "help", &result).expect("cache");
let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("demo")
.arg("lo")
.output()
.expect("run inshellah complete");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8(output.stdout).expect("stdout");
let load_pos = stdout.find(r#""value":"load""#).unwrap_or(usize::MAX);
let clone_pos = stdout.find(r#""value":"clone""#).unwrap_or(usize::MAX);
assert!(
load_pos < clone_pos,
"expected boundary match to outrank substring match, stdout = {stdout}"
);
let _ = fs::remove_dir_all(root);
}
#[test]
fn complete_returns_flags_only_after_hyphen() {
let root = unique_temp_dir("inshellah-flag-prefix-complete");
let cache_dir = root.join("cache");
fs::create_dir_all(&cache_dir).expect("cache dir");
let result = ManpageResult {
entries: vec![ManpageEntry {
switch: OwnedSwitch::Long("verbose".to_string()),
param: None,
desc: "verbose output".to_string(),
}],
subcommands: Vec::new(),
positionals: Vec::new(),
description: String::new(),
};
write_result(&cache_dir, "demo", "help", &result).expect("cache");
let argument_output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("demo")
.arg("")
.output()
.expect("run inshellah complete");
assert!(
argument_output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&argument_output.stderr)
);
let argument_stdout = String::from_utf8(argument_output.stdout).expect("stdout");
assert_eq!(argument_stdout.trim(), "null", "stdout = {argument_stdout}");
let flag_output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("demo")
.arg("--")
.output()
.expect("run inshellah complete");
assert!(
flag_output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&flag_output.stderr)
);
let flag_stdout = String::from_utf8(flag_output.stdout).expect("stdout");
assert!(
flag_stdout.contains(r#""value":"--verbose""#),
"stdout = {flag_stdout}"
);
let _ = fs::remove_dir_all(root);
}
#[test]
fn complete_resolves_absolute_path_after_elevation_wrapper() {
let root = unique_temp_dir("inshellah-absolute-elevation-complete");
let bin_dir = root.join("bin");
let cache_dir = root.join("cache");
fs::create_dir_all(&bin_dir).expect("bin dir");
fs::create_dir_all(&cache_dir).expect("cache dir");
let fakecmd = bin_dir.join("fakecmd");
fs::write(
&fakecmd,
r#"#!/bin/sh
if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
printf '%s\n' 'Usage: fakecmd [OPTIONS]' '' 'Options:' ' --verbose verbose output'
exit 0
fi
exit 2
"#,
)
.expect("write fakecmd");
let mut perms = fs::metadata(&fakecmd).expect("metadata").permissions();
perms.set_mode(0o755);
fs::set_permissions(&fakecmd, perms).expect("chmod");
let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("--timeout-ms")
.arg("1000")
.arg("sudo")
.arg(&fakecmd)
.arg("--")
.env("PATH", "")
.output()
.expect("run inshellah complete");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8(output.stdout).expect("stdout");
assert!(
stdout.contains(r#""value":"--verbose""#),
"stdout = {stdout}"
);
let _ = fs::remove_dir_all(root);
}
#[test]
fn complete_adb_dynamic_values_use_live_devices_and_packages() {
let root = unique_temp_dir("inshellah-adb-dynamic-complete");
let bin_dir = root.join("bin");
let cache_dir = root.join("cache");
fs::create_dir_all(&bin_dir).expect("bin dir");
fs::create_dir_all(&cache_dir).expect("cache dir");
let adb = bin_dir.join("adb");
fs::write(
&adb,
r#"#!/bin/sh
selector=""
case "$1" in
-s|--serial|--one-device)
selector="$2"
shift 2
;;
-t|--transport-id)
selector="transport:$2"
shift 2
;;
--serial=*)
selector="${1#--serial=}"
shift
;;
--one-device=*)
selector="${1#--one-device=}"
shift
;;
--transport-id=*)
selector="transport:${1#--transport-id=}"
shift
;;
esac
if [ "$1" = "devices" ] && [ "$2" = "-l" ]; then
printf '%s\n' 'List of devices attached'
printf '%s\n' 'emulator-5554 device product:sdk_gphone_x86 model:Pixel_8 device:emu transport_id:1'
printf '%s\n' 'R58M123456 device product:oriole model:Pixel_6 device:oriole transport_id:2'
printf '%s\n' 'offline-1 offline transport_id:3'
exit 0
fi
if [ "$1" = "shell" ] && [ "$2" = "pm" ] && [ "$3" = "list" ] && [ "$4" = "packages" ]; then
case "$selector" in
emulator-5554)
printf '%s\n' 'package:com.example.emu'
printf '%s\n' 'package:org.example.shared'
;;
transport:2)
printf '%s\n' 'package:com.example.transport'
printf '%s\n' 'package:org.example.transport'
;;
*)
printf '%s\n' 'package:com.default.app'
printf '%s\n' 'package:/data/app/org.default.path/base.apk=org.default.path'
;;
esac
exit 0
fi
exit 2
"#,
)
.expect("write adb");
let mut perms = fs::metadata(&adb).expect("metadata").permissions();
perms.set_mode(0o755);
fs::set_permissions(&adb, perms).expect("chmod");
let run_complete = |args: &[&str]| -> String {
let mut cmd = Command::new(env!("CARGO_BIN_EXE_inshellah"));
cmd.arg("complete")
.arg("--dir")
.arg(&cache_dir)
.arg("--timeout-ms")
.arg("1000");
for arg in args {
cmd.arg(arg);
}
let output = cmd
.env("PATH", &bin_dir)
.output()
.expect("run inshellah complete");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
String::from_utf8(output.stdout).expect("stdout")
};
let stdout = run_complete(&["adb", "-s", ""]);
assert!(
stdout.contains(r#""value":"emulator-5554""#),
"stdout = {stdout}"
);
assert!(
stdout.contains(r#""description":"device sdk gphone x86 Pixel 8""#),
"stdout = {stdout}"
);
assert!(
stdout.contains(r#""value":"R58M123456""#),
"stdout = {stdout}"
);
assert!(
stdout.contains(r#""value":"offline-1""#),
"stdout = {stdout}"
);
let prefixed_stdout = run_complete(&["adb", "--serial=R5"]);
assert!(
prefixed_stdout.contains(r#""value":"--serial=R58M123456""#),
"stdout = {prefixed_stdout}"
);
assert!(
!prefixed_stdout.contains(r#""value":"--serial=emulator-5554""#),
"stdout = {prefixed_stdout}"
);
let one_device_stdout = run_complete(&["adb", "--one-device", ""]);
assert!(
one_device_stdout.contains(r#""value":"emulator-5554""#),
"stdout = {one_device_stdout}"
);
let transport_stdout = run_complete(&["adb", "-t", ""]);
assert!(
transport_stdout.contains(r#""value":"1""#),
"stdout = {transport_stdout}"
);
assert!(
transport_stdout.contains(r#""description":"emulator-5554 device sdk gphone x86 Pixel 8""#),
"stdout = {transport_stdout}"
);
assert!(
transport_stdout.contains(r#""value":"2""#),
"stdout = {transport_stdout}"
);
let transport_prefixed_stdout = run_complete(&["adb", "--transport-id=2"]);
assert!(
transport_prefixed_stdout.contains(r#""value":"--transport-id=2""#),
"stdout = {transport_prefixed_stdout}"
);
assert!(
!transport_prefixed_stdout.contains(r#""value":"--transport-id=1""#),
"stdout = {transport_prefixed_stdout}"
);
let uninstall_stdout = run_complete(&["adb", "uninstall", "org"]);
assert!(
uninstall_stdout.contains(r#""value":"org.default.path""#),
"stdout = {uninstall_stdout}"
);
assert!(
!uninstall_stdout.contains(r#""value":"com.default.app""#),
"stdout = {uninstall_stdout}"
);
let clear_stdout = run_complete(&["adb", "-s", "emulator-5554", "shell", "pm", "clear", ""]);
assert!(
clear_stdout.contains(r#""value":"com.example.emu""#),
"stdout = {clear_stdout}"
);
assert!(
!clear_stdout.contains(r#""value":"com.example.transport""#),
"stdout = {clear_stdout}"
);
let force_stop_stdout = run_complete(&[
"adb",
"-t",
"2",
"shell",
"am",
"force-stop",
"--user",
"0",
"com.",
]);
assert!(
force_stop_stdout.contains(r#""value":"com.example.transport""#),
"stdout = {force_stop_stdout}"
);
assert!(
!force_stop_stdout.contains(r#""value":"com.example.emu""#),
"stdout = {force_stop_stdout}"
);
let flag_value_stdout = run_complete(&["adb", "shell", "pm", "enable", "--user", ""]);
assert_eq!(
flag_value_stdout.trim(),
"null",
"stdout = {flag_value_stdout}"
);
let shell_flag_stdout = run_complete(&["adb", "shell", "-s", ""]);
assert_eq!(
shell_flag_stdout.trim(),
"null",
"stdout = {shell_flag_stdout}"
);
let _ = fs::remove_dir_all(root);
}

31
tests/self_completions.rs Normal file
View file

@ -0,0 +1,31 @@
use std::process::Command;
#[test]
fn inshellah_completions_include_all_subcommands() {
let output = Command::new(env!("CARGO_BIN_EXE_inshellah"))
.arg("completions")
.output()
.expect("run inshellah completions");
assert!(
output.status.success(),
"stderr = {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8(output.stdout).expect("stdout");
for subcommand in [
"index",
"manpage",
"manpage-dir",
"complete",
"query",
"dump",
"completions",
] {
let extern_name = format!("export extern \"inshellah {subcommand}\"");
assert!(
stdout.contains(&extern_name),
"missing {extern_name}; stdout = {stdout}"
);
}
}