first draft
This commit is contained in:
parent
ab009ec9af
commit
01ccf64efc
13 changed files with 1311 additions and 239 deletions
4
bin/dune
4
bin/dune
|
|
@ -1,4 +1,4 @@
|
|||
(executable
|
||||
(public_name inshellah-parser)
|
||||
(public_name inshellah)
|
||||
(name main)
|
||||
(libraries inshellah_parser))
|
||||
(libraries inshellah))
|
||||
|
|
|
|||
279
bin/main.ml
279
bin/main.ml
|
|
@ -1,171 +1,112 @@
|
|||
open Inshellah_parser.Parser
|
||||
open Inshellah.Parser
|
||||
open Inshellah.Manpage
|
||||
open Inshellah.Nushell
|
||||
|
||||
let usage () =
|
||||
Printf.eprintf {|inshellah — generate nushell completions from manpages and --help output
|
||||
|
||||
Usage:
|
||||
inshellah manpage FILE Parse a manpage (.1, .1.gz) and emit nushell extern
|
||||
inshellah manpage-dir DIR Batch-process all manpages under DIR/man1/
|
||||
inshellah help CMD [ARGS...] Run CMD ARGS --help, parse output, emit nushell extern
|
||||
inshellah parse-help CMD Read --help text from stdin, emit nushell extern for CMD
|
||||
inshellah demo Run built-in demo
|
||||
|
||||
|};
|
||||
exit 1
|
||||
|
||||
(* Extract command name from a manpage filename *)
|
||||
let cmd_name_of_manpage path =
|
||||
let base = Filename.basename path in
|
||||
(* strip .gz if present *)
|
||||
let base =
|
||||
if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz"
|
||||
else base
|
||||
in
|
||||
(* strip .N section suffix *)
|
||||
try Filename.chop_extension base
|
||||
with Invalid_argument _ -> base
|
||||
|
||||
let cmd_manpage file =
|
||||
let cmd = cmd_name_of_manpage file in
|
||||
let entries = parse_manpage_file file in
|
||||
if entries <> [] then
|
||||
print_string (generate_extern_from_entries cmd entries)
|
||||
|
||||
let cmd_manpage_dir dir =
|
||||
(* Walk man1/ through man9/ looking for manpages *)
|
||||
for section = 1 to 9 do
|
||||
let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in
|
||||
if Sys.file_exists subdir && Sys.is_directory subdir then begin
|
||||
let files = Sys.readdir subdir in
|
||||
Array.sort String.compare files;
|
||||
Array.iter (fun file ->
|
||||
let path = Filename.concat subdir file in
|
||||
try cmd_manpage path
|
||||
with _ -> () (* skip unparseable manpages *)
|
||||
) files
|
||||
end
|
||||
done
|
||||
|
||||
let cmd_help args =
|
||||
match args with
|
||||
| [] -> Printf.eprintf "error: help mode requires a command name\n"; exit 1
|
||||
| cmd :: rest ->
|
||||
let full_cmd =
|
||||
String.concat " " (List.map Filename.quote (cmd :: rest @ ["--help"]))
|
||||
in
|
||||
let ic = Unix.open_process_in (full_cmd ^ " 2>&1") in
|
||||
let buf = Buffer.create 4096 in
|
||||
(try while true do
|
||||
let line = input_line ic in
|
||||
Buffer.add_string buf line;
|
||||
Buffer.add_char buf '\n'
|
||||
done with End_of_file -> ());
|
||||
let _ = Unix.close_process_in ic in
|
||||
let text = Buffer.contents buf in
|
||||
let cmd_name = Filename.basename cmd in
|
||||
(match parse_help text with
|
||||
| Ok r -> print_string (generate_extern cmd_name r)
|
||||
| Error msg -> Printf.eprintf "parse error for %s: %s\n" cmd_name msg; exit 1)
|
||||
|
||||
let cmd_parse_help cmd =
|
||||
let buf = Buffer.create 4096 in
|
||||
(try while true do
|
||||
let line = input_line stdin in
|
||||
Buffer.add_string buf line;
|
||||
Buffer.add_char buf '\n'
|
||||
done with End_of_file -> ());
|
||||
let text = Buffer.contents buf in
|
||||
(match parse_help text with
|
||||
| Ok r -> print_string (generate_extern cmd r)
|
||||
| Error msg -> Printf.eprintf "parse error for %s: %s\n" cmd msg; exit 1)
|
||||
|
||||
let cmd_demo () =
|
||||
let ls_help =
|
||||
{|Usage: ls [OPTION]... [FILE]...
|
||||
List information about the FILEs (the current directory by default).
|
||||
|
||||
-a, --all do not ignore entries starting with .
|
||||
-A, --almost-all do not list implied . and ..
|
||||
--block-size=SIZE with -l, scale sizes by SIZE when printing them;
|
||||
e.g., '--block-size=M'; see SIZE format below
|
||||
--color[=WHEN] color the output WHEN
|
||||
-h, --human-readable with -l and -s, print sizes like 1K 234M 2G etc.
|
||||
--help display this help and exit
|
||||
--version output version information and exit
|
||||
|}
|
||||
in
|
||||
Printf.printf "# Generated by: inshellah demo\n\n";
|
||||
(match parse_help ls_help with
|
||||
| Ok r -> print_string (generate_extern "ls" r)
|
||||
| Error msg -> Printf.eprintf "parse error: %s\n" msg)
|
||||
|
||||
let () =
|
||||
let _cp =
|
||||
{|
|
||||
-A, --show-all equivalent to -vET
|
||||
-b, --number-nonblank number nonempty output lines, overrides -n
|
||||
-e equivalent to -vE
|
||||
-E, --show-ends display $ or ^M$ at end of each line
|
||||
-n, --number number all output lines
|
||||
-s, --squeeze-blank suppress repeated empty output lines
|
||||
-t equivalent to -vT
|
||||
-T, --show-tabs display TAB characters as ^I
|
||||
-u (ignored)
|
||||
-v, --show-nonprinting use ^ and M- notation, except for LFD and TAB
|
||||
--help
|
||||
display this help and exit
|
||||
--version
|
||||
output version information and exit
|
||||
|}
|
||||
and ls =
|
||||
{|
|
||||
-a, --all
|
||||
do not ignore entries starting with .
|
||||
-A, --almost-all
|
||||
do not list implied . and ..
|
||||
--author
|
||||
with -l, print the author of each file
|
||||
-b, --escape
|
||||
print C-style escapes for nongraphic characters
|
||||
--block-size=SIZE
|
||||
with -l, scale sizes by SIZE when printing them;
|
||||
e.g., '--block-size=M'; see SIZE format below
|
||||
-B, --ignore-backups
|
||||
do not list implied entries ending with ~
|
||||
-c
|
||||
with -lt: sort by, and show, ctime
|
||||
(time of last change of file status information);
|
||||
with -l: show ctime and sort by name;
|
||||
otherwise: sort by ctime, newest first
|
||||
-C
|
||||
list entries by columns
|
||||
--color[=WHEN]
|
||||
color the output WHEN; more info below
|
||||
-d, --directory
|
||||
list directories themselves, not their contents
|
||||
-D, --dired
|
||||
generate output designed for Emacs' dired mode
|
||||
-f
|
||||
same as -a -U
|
||||
-F, --classify[=WHEN]
|
||||
append indicator (one of */=>@|) to entries WHEN
|
||||
--file-type
|
||||
like -F, except do not append '*'
|
||||
--format=WORD
|
||||
across,horizontal (-x), commas (-m), long (-l),
|
||||
single-column (-1), verbose (-l), vertical (-C)
|
||||
--full-time
|
||||
like -l --time-style=full-iso
|
||||
-g
|
||||
like -l, but do not list owner
|
||||
--group-directories-first
|
||||
group directories before files
|
||||
-G, --no-group
|
||||
in a long listing, don't print group names
|
||||
-h, --human-readable
|
||||
with -l and -s, print sizes like 1K 234M 2G etc.
|
||||
--si
|
||||
likewise, but use powers of 1000 not 1024
|
||||
-H, --dereference-command-line
|
||||
follow symbolic links listed on the command line
|
||||
--dereference-command-line-symlink-to-dir
|
||||
follow each command line symbolic link that points to a directory
|
||||
--hide=PATTERN
|
||||
do not list implied entries matching shell PATTERN
|
||||
(overridden by -a or -A)
|
||||
--hyperlink[=WHEN]
|
||||
hyperlink file names WHEN
|
||||
--indicator-style=WORD
|
||||
append indicator with style WORD to entry names:
|
||||
none (default), slash (-p), file-type (--file-type), classify (-F)
|
||||
-i, --inode
|
||||
print the index number of each file
|
||||
-I, --ignore=PATTERN
|
||||
do not list implied entries matching shell PATTERN
|
||||
-k, --kibibytes
|
||||
default to 1024-byte blocks for file system usage;
|
||||
used only with -s and per directory totals
|
||||
-l
|
||||
use a long listing format
|
||||
-L, --dereference
|
||||
when showing file information for a symbolic link,
|
||||
show information for the file the link references
|
||||
rather than for the link itself
|
||||
-m
|
||||
fill width with a comma separated list of entries
|
||||
-n, --numeric-uid-gid
|
||||
like -l, but list numeric user and group IDs
|
||||
-N, --literal
|
||||
print entry names without quoting
|
||||
-o
|
||||
like -l, but do not list group information
|
||||
-p, --indicator-style=slash
|
||||
append / indicator to directories
|
||||
-q, --hide-control-chars
|
||||
print ? instead of nongraphic characters
|
||||
--show-control-chars
|
||||
show nongraphic characters as-is;
|
||||
the default, unless program is 'ls' and output is a terminal
|
||||
-Q, --quote-name
|
||||
enclose entry names in double quotes
|
||||
--quoting-style=WORD
|
||||
use quoting style WORD for entry names:
|
||||
literal, locale, shell, shell-always,
|
||||
shell-escape, shell-escape-always, c, escape
|
||||
(overrides QUOTING_STYLE environment variable)
|
||||
-r, --reverse
|
||||
reverse order while sorting
|
||||
-R, --recursive
|
||||
list subdirectories recursively
|
||||
-s, --size
|
||||
print the allocated size of each file, in blocks
|
||||
-S
|
||||
sort by file size, largest first
|
||||
--sort=WORD
|
||||
change default 'name' sort to WORD:
|
||||
none (-U), size (-S), time (-t),
|
||||
version (-v), extension (-X), name, width
|
||||
--time=WORD
|
||||
select which timestamp used to display or sort;
|
||||
access time (-u): atime, access, use;
|
||||
metadata change time (-c): ctime, status;
|
||||
modified time (default): mtime, modification;
|
||||
birth time: birth, creation;
|
||||
with -l, WORD determines which time to show;
|
||||
with --sort=time, sort by WORD (newest first)
|
||||
--time-style=TIME_STYLE
|
||||
time/date format with -l; see TIME_STYLE below
|
||||
-t
|
||||
sort by time, newest first; see --time
|
||||
-T, --tabsize=COLS
|
||||
assume tab stops at each COLS instead of 8
|
||||
-u
|
||||
with -lt: sort by, and show, access time;
|
||||
with -l: show access time and sort by name;
|
||||
otherwise: sort by access time, newest first
|
||||
-U
|
||||
do not sort directory entries
|
||||
-v
|
||||
natural sort of (version) numbers within text
|
||||
-w, --width=COLS
|
||||
set output width to COLS. 0 means no limit
|
||||
-x
|
||||
list entries by lines instead of by columns
|
||||
-X
|
||||
sort alphabetically by entry extension
|
||||
-Z, --context
|
||||
print any security context of each file
|
||||
--zero
|
||||
end each output line with NUL, not newline
|
||||
-1
|
||||
list one file per line
|
||||
--help
|
||||
display this help and exit
|
||||
--version
|
||||
output version information and exit
|
||||
|}
|
||||
in
|
||||
match parse_help ls with
|
||||
| Ok entries -> List.iter (fun e -> print_entry e) entries
|
||||
| Error msg -> Printf.eprintf "parse error: %s\n" msg
|
||||
let args = Array.to_list Sys.argv |> List.tl in
|
||||
match args with
|
||||
| ["manpage"; file] -> cmd_manpage file
|
||||
| ["manpage-dir"; dir] -> cmd_manpage_dir dir
|
||||
| "help" :: rest -> cmd_help rest
|
||||
| ["parse-help"; cmd] -> cmd_parse_help cmd
|
||||
| ["demo"] -> cmd_demo ()
|
||||
| _ -> usage ()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
(lang dune 3.20)
|
||||
|
||||
(name inshellah-parser)
|
||||
(name inshellah)
|
||||
|
||||
(generate_opam_files true)
|
||||
|
||||
|
|
@ -16,7 +16,7 @@
|
|||
(documentation https://url/to/documentation)
|
||||
|
||||
(package
|
||||
(name inshellah-parser)
|
||||
(name inshellah)
|
||||
(synopsis "A short synopsis")
|
||||
(description "A longer description")
|
||||
(depends
|
||||
|
|
|
|||
27
flake.nix
27
flake.nix
|
|
@ -28,5 +28,32 @@
|
|||
};
|
||||
}
|
||||
);
|
||||
|
||||
packages = forAllSystems (
|
||||
pkgs: sys: {
|
||||
default = pkgs.ocamlPackages.buildDunePackage {
|
||||
pname = "inshellah";
|
||||
version = "0.1";
|
||||
src = ./.;
|
||||
nativeBuildInputs = [ pkgs.git ];
|
||||
buildInputs = with pkgs.ocamlPackages; [
|
||||
dune_3
|
||||
ocaml
|
||||
angstrom
|
||||
angstrom-unix
|
||||
];
|
||||
|
||||
meta.mainProgram = "inshellah";
|
||||
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
nixosModules.default =
|
||||
{ pkgs, ... }:
|
||||
{
|
||||
imports = [ ./nix/module.nix ];
|
||||
programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,35 +0,0 @@
|
|||
# This file is generated by dune, edit dune-project instead
|
||||
opam-version: "2.0"
|
||||
synopsis: "A short synopsis"
|
||||
description: "A longer description"
|
||||
maintainer: ["Maintainer Name <maintainer@example.com>"]
|
||||
authors: ["Author Name <author@example.com>"]
|
||||
license: "LICENSE"
|
||||
tags: ["add topics" "to describe" "your" "project"]
|
||||
homepage: "https://github.com/username/reponame"
|
||||
doc: "https://url/to/documentation"
|
||||
bug-reports: "https://github.com/username/reponame/issues"
|
||||
depends: [
|
||||
"ocaml"
|
||||
"dune" {>= "3.20"}
|
||||
"angstrom"
|
||||
"angstrom-unix"
|
||||
"ppx_inline_test" {with-test}
|
||||
"odoc" {with-doc}
|
||||
]
|
||||
build: [
|
||||
["dune" "subst"] {dev}
|
||||
[
|
||||
"dune"
|
||||
"build"
|
||||
"-p"
|
||||
name
|
||||
"-j"
|
||||
jobs
|
||||
"@install"
|
||||
"@runtest" {with-test}
|
||||
"@doc" {with-doc}
|
||||
]
|
||||
]
|
||||
dev-repo: "git+https://github.com/username/reponame.git"
|
||||
x-maintenance-intent: ["(latest)"]
|
||||
4
lib/dune
4
lib/dune
|
|
@ -1,3 +1,3 @@
|
|||
(library
|
||||
(name inshellah_parser)
|
||||
(libraries angstrom angstrom-unix))
|
||||
(name inshellah)
|
||||
(libraries angstrom angstrom-unix str unix))
|
||||
|
|
|
|||
415
lib/manpage.ml
Normal file
415
lib/manpage.ml
Normal file
|
|
@ -0,0 +1,415 @@
|
|||
open Parser
|
||||
|
||||
(* --- Groff escape/formatting stripper --- *)
|
||||
|
||||
let strip_groff_escapes s =
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if s.[!i] = '\\' && !i + 1 < len then begin
|
||||
let next = s.[!i + 1] in
|
||||
match next with
|
||||
| 'f' ->
|
||||
(* Font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...] *)
|
||||
if !i + 2 < len then begin
|
||||
if s.[!i + 2] = '(' then
|
||||
i := !i + 4 (* \f(XX *)
|
||||
else if s.[!i + 2] = '[' then begin
|
||||
(* \f[...] - skip to ] *)
|
||||
i := !i + 3;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
i := !i + 3 (* \fX *)
|
||||
end else
|
||||
i := !i + 2
|
||||
| '-' ->
|
||||
Buffer.add_char buf '-';
|
||||
i := !i + 2
|
||||
| '&' | '/' | ',' ->
|
||||
(* Zero-width characters *)
|
||||
i := !i + 2
|
||||
| '(' ->
|
||||
(* Two-char named character: \(aq, \(lq, \(rq, etc. *)
|
||||
if !i + 3 < len then begin
|
||||
let name = String.sub s (!i + 2) 2 in
|
||||
(match name with
|
||||
| "aq" -> Buffer.add_char buf '\''
|
||||
| "lq" | "Lq" -> Buffer.add_char buf '"'
|
||||
| "rq" | "Rq" -> Buffer.add_char buf '"'
|
||||
| "em" | "en" -> Buffer.add_char buf '-'
|
||||
| _ -> ());
|
||||
i := !i + 4
|
||||
end else
|
||||
i := !i + 2
|
||||
| '[' ->
|
||||
(* Named character: \[...] *)
|
||||
i := !i + 2;
|
||||
let start = !i in
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then begin
|
||||
let name = String.sub s start (!i - start) in
|
||||
(match name with
|
||||
| "aq" -> Buffer.add_char buf '\''
|
||||
| "lq" | "Lq" -> Buffer.add_char buf '"'
|
||||
| "rq" | "Rq" -> Buffer.add_char buf '"'
|
||||
| _ -> ());
|
||||
incr i
|
||||
end
|
||||
| 's' ->
|
||||
(* Size escape: \sN, \s+N, \s-N, \s'N' *)
|
||||
i := !i + 2;
|
||||
if !i < len && (s.[!i] = '+' || s.[!i] = '-') then incr i;
|
||||
if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i;
|
||||
if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i
|
||||
| 'm' ->
|
||||
(* Color escape: \m[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len && s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end
|
||||
| 'X' ->
|
||||
(* Device control: \X'...' *)
|
||||
i := !i + 2;
|
||||
if !i < len && s.[!i] = '\'' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> '\'' do incr i done;
|
||||
if !i < len then incr i
|
||||
end
|
||||
| '*' ->
|
||||
(* String variable: \*X or \*(XX or \*[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len then begin
|
||||
if s.[!i] = '(' then
|
||||
i := !i + 2
|
||||
else if s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
incr i
|
||||
end
|
||||
| 'n' ->
|
||||
(* Number register: \nX or \n(XX or \n[...] *)
|
||||
i := !i + 2;
|
||||
if !i < len then begin
|
||||
if s.[!i] = '(' then
|
||||
i := !i + 2
|
||||
else if s.[!i] = '[' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> ']' do incr i done;
|
||||
if !i < len then incr i
|
||||
end else
|
||||
incr i
|
||||
end
|
||||
| 'e' ->
|
||||
Buffer.add_char buf '\\';
|
||||
i := !i + 2
|
||||
| '\\' ->
|
||||
Buffer.add_char buf '\\';
|
||||
i := !i + 2
|
||||
| ' ' ->
|
||||
Buffer.add_char buf ' ';
|
||||
i := !i + 2
|
||||
| _ ->
|
||||
(* Unknown escape, skip *)
|
||||
i := !i + 2
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Strip inline macro formatting: .BI, .BR, .IR, etc.
|
||||
These macros alternate between fonts for their arguments.
|
||||
We just concatenate the arguments. *)
|
||||
let strip_inline_macro_args s =
|
||||
(* Arguments are separated by spaces, quoted strings are kept together *)
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if s.[!i] = '"' then begin
|
||||
incr i;
|
||||
while !i < len && s.[!i] <> '"' do
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
done;
|
||||
if !i < len then incr i
|
||||
end else if s.[!i] = ' ' || s.[!i] = '\t' then begin
|
||||
incr i
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
let strip_groff line =
|
||||
let s = strip_groff_escapes line in
|
||||
String.trim s
|
||||
|
||||
(* --- Line classification --- *)
|
||||
|
||||
type groff_line =
|
||||
| Macro of string * string (* e.g. ("SH", "OPTIONS") or ("TP", "") *)
|
||||
| Text of string (* plain text after stroff stripping *)
|
||||
| Blank
|
||||
| Comment
|
||||
|
||||
let classify_line line =
|
||||
let len = String.length line in
|
||||
if len = 0 then Blank
|
||||
else if len >= 2 && line.[0] = '.' && line.[1] = '\\' && (len < 3 || line.[2] = '"') then
|
||||
Comment
|
||||
else if len >= 3 && line.[0] = '\\' && line.[1] = '"' then
|
||||
Comment
|
||||
else if line.[0] = '.' || line.[0] = '\'' then begin
|
||||
(* Macro line *)
|
||||
let rest = String.sub line 1 (len - 1) in
|
||||
let rest = String.trim rest in
|
||||
(* Split into macro name and arguments *)
|
||||
let space_pos =
|
||||
try Some (String.index rest ' ')
|
||||
with Not_found ->
|
||||
try Some (String.index rest '\t')
|
||||
with Not_found -> None
|
||||
in
|
||||
match space_pos with
|
||||
| Some pos ->
|
||||
let name = String.sub rest 0 pos in
|
||||
let args = String.trim (String.sub rest (pos + 1) (String.length rest - pos - 1)) in
|
||||
(* Strip quotes from args *)
|
||||
let args =
|
||||
let alen = String.length args in
|
||||
if alen >= 2 && args.[0] = '"' && args.[alen - 1] = '"' then
|
||||
String.sub args 1 (alen - 2)
|
||||
else args
|
||||
in
|
||||
Macro (name, args)
|
||||
| None ->
|
||||
Macro (rest, "")
|
||||
end else begin
|
||||
let stripped = strip_groff line in
|
||||
if String.length stripped = 0 then Blank
|
||||
else Text stripped
|
||||
end
|
||||
|
||||
(* Check for dot-backslash-quote style comments more carefully *)
|
||||
let is_comment_line line =
|
||||
let len = String.length line in
|
||||
(len >= 3 && line.[0] = '.' && line.[1] = '\\' && line.[2] = '"')
|
||||
|| (len >= 2 && line.[0] = '\\' && line.[1] = '"')
|
||||
|
||||
let classify_line line =
|
||||
if is_comment_line line then Comment
|
||||
else classify_line line
|
||||
|
||||
(* --- Section extraction --- *)
|
||||
|
||||
let extract_options_section lines =
|
||||
let classified = List.map classify_line lines in
|
||||
let rec collect_until_next_sh lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("SH", _) :: _ -> List.rev acc
|
||||
| line :: rest -> collect_until_next_sh rest (line :: acc)
|
||||
in
|
||||
let is_options_section name =
|
||||
let s = String.uppercase_ascii (String.trim name) in
|
||||
s = "OPTIONS"
|
||||
|| (String.length s > 0 &&
|
||||
try let _ = Str.search_forward (Str.regexp_string "OPTION") s 0 in true
|
||||
with Not_found -> false)
|
||||
in
|
||||
(* First pass: look for OPTIONS section *)
|
||||
let rec find_options = function
|
||||
| [] -> None
|
||||
| Macro ("SH", args) :: rest when is_options_section args ->
|
||||
Some (collect_until_next_sh rest [])
|
||||
| _ :: rest -> find_options rest
|
||||
in
|
||||
(* Fallback: DESCRIPTION section *)
|
||||
let rec find_description = function
|
||||
| [] -> []
|
||||
| Macro ("SH", args) :: rest
|
||||
when String.uppercase_ascii (String.trim args) = "DESCRIPTION" ->
|
||||
collect_until_next_sh rest []
|
||||
| _ :: rest -> find_description rest
|
||||
in
|
||||
match find_options classified with
|
||||
| Some section -> section
|
||||
| None -> find_description classified
|
||||
|
||||
(* --- Strategy-based entry extraction --- *)
|
||||
|
||||
(* Collect text lines until next macro or blank *)
|
||||
let rec collect_text_lines lines acc =
|
||||
match lines with
|
||||
| Text s :: rest -> collect_text_lines rest (s :: acc)
|
||||
| _ -> (String.concat " " (List.rev acc), lines)
|
||||
|
||||
(* Parse a tag line to extract entry using the Angstrom switch_parser *)
|
||||
let parse_tag_to_entry tag desc =
|
||||
let tag = strip_groff_escapes tag in
|
||||
let tag = String.trim tag in
|
||||
match Angstrom.parse_string ~consume:Angstrom.Consume.Prefix
|
||||
(Angstrom.lift2 (fun sw p -> (sw, p)) switch_parser param_parser) tag with
|
||||
| Ok (switch, param) -> Some { switch; param; desc }
|
||||
| Error _ -> None
|
||||
|
||||
(* Strategy A: .TP style (most common — GNU coreutils, help2man) *)
|
||||
let strategy_tp lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("TP", _) :: rest ->
|
||||
(* Next text line is the tag *)
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
let (desc, rest3) = collect_text_lines rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy B: .IP style (curl, hand-written) *)
|
||||
let strategy_ip lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("IP", tag) :: rest ->
|
||||
let tag = strip_groff_escapes tag in
|
||||
let (desc, rest2) = collect_text_lines rest [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest2 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy C: .PP + .RS/.RE style (git, DocBook) *)
|
||||
let strategy_pp_rs lines =
|
||||
let rec walk lines acc =
|
||||
match lines with
|
||||
| [] -> List.rev acc
|
||||
| Macro ("PP", _) :: rest ->
|
||||
begin match rest with
|
||||
| Text tag :: rest2 ->
|
||||
(* Look for .RS ... text ... .RE *)
|
||||
let rec collect_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RS", _) :: rest3 ->
|
||||
collect_in_rs rest3 desc_acc
|
||||
| Text s :: rest3 ->
|
||||
(* Sometimes description follows directly *)
|
||||
collect_rs rest3 (s :: desc_acc)
|
||||
| _ -> (String.concat " " (List.rev desc_acc), lines)
|
||||
and collect_in_rs lines desc_acc =
|
||||
match lines with
|
||||
| Macro ("RE", _) :: rest3 ->
|
||||
(String.concat " " (List.rev desc_acc), rest3)
|
||||
| Text s :: rest3 ->
|
||||
collect_in_rs rest3 (s :: desc_acc)
|
||||
| Macro ("PP", _) :: _ | Macro ("SH", _) :: _ ->
|
||||
(String.concat " " (List.rev desc_acc), lines)
|
||||
| _ :: rest3 -> collect_in_rs rest3 desc_acc
|
||||
| [] -> (String.concat " " (List.rev desc_acc), [])
|
||||
in
|
||||
let (desc, rest3) = collect_rs rest2 [] in
|
||||
let entry = parse_tag_to_entry tag desc in
|
||||
walk rest3 (match entry with Some e -> e :: acc | None -> acc)
|
||||
| _ -> walk rest acc
|
||||
end
|
||||
| _ :: rest -> walk rest acc
|
||||
in
|
||||
walk lines []
|
||||
|
||||
(* Strategy D: Deroff fallback — strip all groff, use help text parser *)
|
||||
let strategy_deroff_lines lines =
|
||||
let buf = Buffer.create 256 in
|
||||
List.iter (fun line ->
|
||||
match line with
|
||||
| Text s ->
|
||||
Buffer.add_string buf s;
|
||||
Buffer.add_char buf '\n'
|
||||
| Macro (("BI" | "BR" | "IR" | "B" | "I"), args) ->
|
||||
let text = strip_inline_macro_args args in
|
||||
let text = strip_groff_escapes text in
|
||||
Buffer.add_string buf text;
|
||||
Buffer.add_char buf '\n'
|
||||
| Blank -> Buffer.add_char buf '\n'
|
||||
| _ -> ()
|
||||
) lines;
|
||||
let text = Buffer.contents buf in
|
||||
match parse_help text with
|
||||
| Ok result -> result.entries
|
||||
| Error _ -> []
|
||||
|
||||
(* Count macros of a given type *)
|
||||
let count_macro name lines =
|
||||
List.fold_left (fun n line ->
|
||||
match line with Macro (m, _) when m = name -> n + 1 | _ -> n
|
||||
) 0 lines
|
||||
|
||||
(* Auto-detect and try strategies, return the one with most entries *)
|
||||
let extract_entries lines =
|
||||
let results = ref [] in
|
||||
(* Try TP if .TP macros present *)
|
||||
if count_macro "TP" lines > 0 then
|
||||
results := ("TP", strategy_tp lines) :: !results;
|
||||
(* Try IP if .IP macros present *)
|
||||
if count_macro "IP" lines > 0 then
|
||||
results := ("IP", strategy_ip lines) :: !results;
|
||||
(* Try PP+RS if both present *)
|
||||
if count_macro "PP" lines > 0 && count_macro "RS" lines > 0 then
|
||||
results := ("PP+RS", strategy_pp_rs lines) :: !results;
|
||||
(* Always try deroff as fallback *)
|
||||
results := ("deroff", strategy_deroff_lines lines) :: !results;
|
||||
(* Pick the result with the most entries *)
|
||||
let best =
|
||||
List.fold_left (fun (best_name, best_entries) (name, entries) ->
|
||||
if List.length entries >= List.length best_entries then (name, entries)
|
||||
else (best_name, best_entries)
|
||||
) ("none", []) !results
|
||||
in
|
||||
snd best
|
||||
|
||||
(* --- Top-level API --- *)
|
||||
|
||||
let parse_manpage_lines lines =
|
||||
let options_section = extract_options_section lines in
|
||||
extract_entries options_section
|
||||
|
||||
let parse_manpage_string contents =
|
||||
let lines = String.split_on_char '\n' contents in
|
||||
parse_manpage_lines lines
|
||||
|
||||
let parse_manpage_gzipped_file path =
|
||||
let ic = Unix.open_process_in (Printf.sprintf "gzip -dc %s" (Filename.quote path)) in
|
||||
let buf = Buffer.create 4096 in
|
||||
(try while true do
|
||||
let line = input_line ic in
|
||||
Buffer.add_string buf line;
|
||||
Buffer.add_char buf '\n'
|
||||
done with End_of_file -> ());
|
||||
let _ = Unix.close_process_in ic in
|
||||
parse_manpage_string (Buffer.contents buf)
|
||||
|
||||
let parse_manpage_file path =
|
||||
if Filename.check_suffix path ".gz" then
|
||||
parse_manpage_gzipped_file path
|
||||
else begin
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let s = Bytes.create n in
|
||||
really_input ic s 0 n;
|
||||
close_in ic;
|
||||
parse_manpage_string (Bytes.to_string s)
|
||||
end
|
||||
82
lib/nushell.ml
Normal file
82
lib/nushell.ml
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
open Parser
|
||||
|
||||
(* Map a param name/type hint to a nushell type *)
|
||||
let nushell_type_of_param = function
|
||||
| "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY"
|
||||
| "FILENAME" | "PATTERNFILE" -> "path"
|
||||
| "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH"
|
||||
| "LINES" | "DEPTH" | "depth" -> "int"
|
||||
| _ -> "string"
|
||||
|
||||
(* Escape a nushell string: wrap in double quotes, escape inner quotes *)
|
||||
let escape_nu s =
|
||||
let buf = Buffer.create (String.length s + 2) in
|
||||
String.iter (fun c ->
|
||||
match c with
|
||||
| '"' -> Buffer.add_string buf "\\\""
|
||||
| '\\' -> Buffer.add_string buf "\\\\"
|
||||
| _ -> Buffer.add_char buf c
|
||||
) s;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Format a single flag for nushell extern *)
|
||||
let format_flag entry =
|
||||
let buf = Buffer.create 64 in
|
||||
Buffer.add_string buf " ";
|
||||
(* Flag name *)
|
||||
(match entry.switch with
|
||||
| Both (s, l) ->
|
||||
Buffer.add_string buf (Printf.sprintf "--%s(-%c)" l s)
|
||||
| Long l ->
|
||||
Buffer.add_string buf (Printf.sprintf "--%s" l)
|
||||
| Short s ->
|
||||
Buffer.add_string buf (Printf.sprintf "-%c" s));
|
||||
(* Type annotation *)
|
||||
(match entry.param with
|
||||
| Some (Mandatory name) ->
|
||||
Buffer.add_string buf ": ";
|
||||
Buffer.add_string buf (nushell_type_of_param name)
|
||||
| Some (Optional name) ->
|
||||
Buffer.add_string buf ": ";
|
||||
Buffer.add_string buf (nushell_type_of_param name)
|
||||
| None -> ());
|
||||
(* Description as comment *)
|
||||
if String.length entry.desc > 0 then begin
|
||||
(* Pad to align comments *)
|
||||
let current_len = Buffer.length buf in
|
||||
let target = max (current_len + 1) 40 in
|
||||
for _ = current_len to target - 1 do
|
||||
Buffer.add_char buf ' '
|
||||
done;
|
||||
Buffer.add_string buf "# ";
|
||||
Buffer.add_string buf entry.desc
|
||||
end;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Generate nushell extern definition for a command *)
|
||||
let generate_extern cmd_name result =
|
||||
let buf = Buffer.create 1024 in
|
||||
(* Main extern with flags *)
|
||||
Buffer.add_string buf (Printf.sprintf "export extern \"%s\" [\n" (escape_nu cmd_name));
|
||||
List.iter (fun entry ->
|
||||
Buffer.add_string buf (format_flag entry);
|
||||
Buffer.add_char buf '\n'
|
||||
) result.entries;
|
||||
Buffer.add_string buf "]\n";
|
||||
(* Subcommand externs *)
|
||||
List.iter (fun (sc : subcommand) ->
|
||||
Buffer.add_string buf
|
||||
(Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n"
|
||||
(escape_nu cmd_name) (escape_nu sc.name) (escape_nu sc.desc))
|
||||
) result.subcommands;
|
||||
Buffer.contents buf
|
||||
|
||||
(* Generate a complete nushell module *)
|
||||
let generate_module cmd_name result =
|
||||
Printf.sprintf "module %s-completions {\n%s}\n"
|
||||
cmd_name (generate_extern cmd_name result)
|
||||
|
||||
(* Generate from manpage entries (no subcommands) *)
|
||||
let generate_extern_from_entries cmd_name entries =
|
||||
let result = { entries; subcommands = [] } in
|
||||
generate_extern cmd_name result
|
||||
262
lib/parser.ml
262
lib/parser.ml
|
|
@ -1,16 +1,55 @@
|
|||
(* open Angstrom_unix *)
|
||||
(* also look for "subcommands" for clapslop *)
|
||||
(* and other common help patterns *)
|
||||
open Angstrom
|
||||
|
||||
let ( <| ) = ( @@ )
|
||||
let ( <&> ) p1 p2 = lift2 (fun a b -> (a, b)) p1 p2
|
||||
let is_whitespace = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false
|
||||
(* Strip ANSI escape sequences and OSC hyperlinks from --help output *)
|
||||
let strip_ansi s =
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let len = String.length s in
|
||||
let i = ref 0 in
|
||||
while !i < len do
|
||||
if !i + 1 < len && Char.code s.[!i] = 0x1b then begin
|
||||
let next = s.[!i + 1] in
|
||||
if next = '[' then begin
|
||||
(* CSI sequence: ESC [ ... final_byte *)
|
||||
i := !i + 2;
|
||||
while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done;
|
||||
if !i < len then incr i
|
||||
end else if next = ']' then begin
|
||||
(* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *)
|
||||
i := !i + 2;
|
||||
let found = ref false in
|
||||
while !i < len && not !found do
|
||||
if s.[!i] = '\x07' then
|
||||
(incr i; found := true)
|
||||
else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then
|
||||
(i := !i + 2; found := true)
|
||||
else
|
||||
incr i
|
||||
done
|
||||
end else begin
|
||||
(* Other ESC sequence, skip ESC + one char *)
|
||||
i := !i + 2
|
||||
end
|
||||
end else begin
|
||||
Buffer.add_char buf s.[!i];
|
||||
incr i
|
||||
end
|
||||
done;
|
||||
Buffer.contents buf
|
||||
|
||||
let is_whitespace = function ' ' | '\t' -> true | _ -> false
|
||||
|
||||
let is_alphanumeric = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_param_char = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_upper_or_underscore = function
|
||||
| 'A' .. 'Z' | '_' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_long_char = function
|
||||
| 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true
|
||||
| _ -> false
|
||||
|
|
@ -18,48 +57,207 @@ let is_long_char = function
|
|||
type switch = Short of char | Long of string | Both of char * string
|
||||
type param = Mandatory of string | Optional of string
|
||||
type entry = { switch : switch; param : param option; desc : string }
|
||||
type subcommand = { name : string; desc : string }
|
||||
type help_result = { entries : entry list; subcommands : subcommand list }
|
||||
|
||||
(* --- Low-level combinators --- *)
|
||||
|
||||
let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false)
|
||||
let eol = end_of_line <|> end_of_input
|
||||
let eol_strict = end_of_line (* Must consume a newline, no EOF match *)
|
||||
|
||||
let whitespace = skip_while is_whitespace
|
||||
let comma = char ',' *> whitespace
|
||||
let short_switch = char '-' *> satisfy is_alphanumeric
|
||||
let long_switch = string "--" *> take_while1 is_long_char
|
||||
let comma = char ',' *> inline_ws
|
||||
|
||||
let opt_param =
|
||||
print_endline "opt param is running";
|
||||
string "[=" *> take_while is_alphanumeric <* char ']' >>| fun a -> Optional a
|
||||
(* Parameter parsers *)
|
||||
let eq_opt_param =
|
||||
string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a
|
||||
|
||||
let man_param =
|
||||
print_endline "man param is running";
|
||||
char '=' *> take_while is_alphanumeric >>| fun a -> Mandatory a
|
||||
let eq_man_param =
|
||||
char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a
|
||||
|
||||
(* Space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE" *)
|
||||
let space_upper_param =
|
||||
char ' ' *> peek_char_fail >>= fun c ->
|
||||
if is_upper_or_underscore c then
|
||||
take_while1 is_param_char >>= fun name ->
|
||||
(* Ensure it's truly all-uppercase (not a description word like "Do") *)
|
||||
if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then
|
||||
return (Mandatory name)
|
||||
else
|
||||
fail "not an all-caps param"
|
||||
else
|
||||
fail "not an uppercase param"
|
||||
|
||||
(* Angle-bracket param: e.g. "<file>", "<notation>" *)
|
||||
let angle_param =
|
||||
char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name ->
|
||||
Mandatory name
|
||||
|
||||
(* Space + angle bracket param *)
|
||||
let space_angle_param =
|
||||
char ' ' *> angle_param
|
||||
|
||||
(* Optional angle bracket param: [<file>] *)
|
||||
let opt_angle_param =
|
||||
char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']'
|
||||
>>| fun name -> Optional name
|
||||
|
||||
let space_opt_angle_param =
|
||||
char ' ' *> opt_angle_param
|
||||
|
||||
(* Go/Cobra style: space + lowercase type word like "string", "list", "int" *)
|
||||
let space_type_param =
|
||||
char ' ' *> peek_char_fail >>= fun c ->
|
||||
if c >= 'a' && c <= 'z' then
|
||||
take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name ->
|
||||
(* Only short type-like words *)
|
||||
if String.length name <= 10 then
|
||||
return (Mandatory name)
|
||||
else
|
||||
fail "too long for type param"
|
||||
else
|
||||
fail "not a lowercase type param"
|
||||
|
||||
let param_parser =
|
||||
option None (choice [ opt_param; man_param ] >>| fun a -> Some a)
|
||||
option None
|
||||
(choice
|
||||
[ eq_opt_param; eq_man_param;
|
||||
space_opt_angle_param; space_angle_param;
|
||||
space_upper_param; space_type_param ]
|
||||
>>| fun a -> Some a)
|
||||
|
||||
(* Switch parser: -a, --all | -a | --all *)
|
||||
let switch_parser =
|
||||
choice
|
||||
[
|
||||
(* -a, --all *)
|
||||
( short_switch >>= fun s ->
|
||||
comma *> long_switch >>| fun l -> Both (s, l) );
|
||||
(* -a *)
|
||||
(short_switch >>= fun s ->
|
||||
comma *> long_switch >>| fun l -> Both (s, l));
|
||||
(short_switch >>| fun s -> Short s);
|
||||
(* --all *)
|
||||
(long_switch >>| fun l -> Long l);
|
||||
]
|
||||
|
||||
let description = whitespace *> take_till (fun c -> c = '\n') <* end_of_line
|
||||
(* --- Description parsing with multi-line continuation --- *)
|
||||
|
||||
(* Take the rest of the line as text (does not consume newline) *)
|
||||
let rest_of_line = take_till (fun c -> c = '\n' || c = '\r')
|
||||
|
||||
(* Check if a line is a continuation line: deeply indented, doesn't start with '-' *)
|
||||
let continuation_line =
|
||||
peek_string 1 >>= fun _ ->
|
||||
(* Must start with significant whitespace (8+ spaces or tab) *)
|
||||
let count_indent s =
|
||||
let n = ref 0 in
|
||||
let i = ref 0 in
|
||||
while !i < String.length s do
|
||||
(match s.[!i] with
|
||||
| ' ' -> incr n
|
||||
| '\t' -> n := !n + 8
|
||||
| _ -> i := String.length s);
|
||||
incr i
|
||||
done;
|
||||
!n
|
||||
in
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
(* Peek ahead to see indentation level *)
|
||||
peek_string (min avail 80) >>= fun preview ->
|
||||
let indent = count_indent preview in
|
||||
let trimmed = String.trim preview in
|
||||
let starts_with_dash =
|
||||
String.length trimmed > 0 && trimmed.[0] = '-'
|
||||
in
|
||||
if indent >= 8 && not starts_with_dash then
|
||||
(* This is a continuation line — consume whitespace + text *)
|
||||
inline_ws *> rest_of_line <* eol
|
||||
else
|
||||
fail "not a continuation line"
|
||||
|
||||
let description =
|
||||
inline_ws *> rest_of_line <* eol >>= fun first_line ->
|
||||
many continuation_line >>| fun cont_lines ->
|
||||
let all = first_line :: cont_lines in
|
||||
let all = List.filter (fun s -> String.length (String.trim s) > 0) all in
|
||||
String.concat " " (List.map String.trim all)
|
||||
|
||||
(* Description that appears on a separate line below the flag (Clap long style) *)
|
||||
let description_below =
|
||||
many1 continuation_line >>| fun lines ->
|
||||
let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in
|
||||
String.concat " " (List.map String.trim lines)
|
||||
|
||||
(* --- Line classification for skipping --- *)
|
||||
|
||||
(* An option line starts with whitespace then '-' *)
|
||||
let at_option_line =
|
||||
peek_string 1 >>= fun _ ->
|
||||
available >>= fun avail ->
|
||||
if avail = 0 then fail "eof"
|
||||
else
|
||||
peek_string (min avail 40) >>= fun preview ->
|
||||
let s = String.trim preview in
|
||||
if String.length s > 0 && s.[0] = '-' then return ()
|
||||
else fail "not an option line"
|
||||
|
||||
(* Skip a non-option line (section header, blank, description-only, etc.) *)
|
||||
let skip_non_option_line =
|
||||
(* Don't skip if this looks like an option line *)
|
||||
(at_option_line *> fail "this is an option line")
|
||||
<|> (rest_of_line *> eol_strict *> return ())
|
||||
|
||||
(* --- Entry parsing --- *)
|
||||
|
||||
(* Parse a single flag entry *)
|
||||
let entry =
|
||||
skip_while (fun c -> c <> '-')
|
||||
*> lift3 (fun a b c -> (a, b, c)) switch_parser param_parser description
|
||||
>>| fun (switch, param, desc) -> { switch; param; desc }
|
||||
inline_ws *>
|
||||
lift2 (fun (sw, param) desc -> { switch = sw; param; desc })
|
||||
(lift2 (fun a b -> (a, b)) switch_parser param_parser)
|
||||
(description <|> (eol *> (description_below <|> return "")))
|
||||
|
||||
let endline = option () (char '\n' *> return ())
|
||||
let entry_line = entry <* endline
|
||||
let help_parser = many entry_line
|
||||
(* --- Subcommand parsing --- *)
|
||||
|
||||
(* A subcommand line: " name description" *)
|
||||
let subcommand_entry =
|
||||
inline_ws *>
|
||||
take_while1 (fun c -> c <> ' ' && c <> '\t' && c <> '\n') >>= fun name ->
|
||||
(* Must have at least 2 spaces before description *)
|
||||
char ' ' *> char ' ' *> inline_ws *>
|
||||
rest_of_line <* eol >>| fun desc ->
|
||||
{ name; desc = String.trim desc }
|
||||
|
||||
(* --- Top-level parser --- *)
|
||||
|
||||
(* The main help parser: walks through lines, skipping non-option content,
|
||||
collecting entries and subcommands *)
|
||||
let help_parser =
|
||||
let open Angstrom in
|
||||
fix (fun _self ->
|
||||
(* Try to parse an entry *)
|
||||
let try_entry =
|
||||
entry >>| fun e -> `Entry e
|
||||
in
|
||||
(* Try to parse a subcommand *)
|
||||
let try_subcommand =
|
||||
subcommand_entry >>| fun sc -> `Subcommand sc
|
||||
in
|
||||
(* Skip one non-option line *)
|
||||
let try_skip =
|
||||
skip_non_option_line >>| fun () -> `Skip
|
||||
in
|
||||
many (choice [ try_entry; try_subcommand; try_skip ]) >>| fun items ->
|
||||
let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in
|
||||
let subcommands = List.filter_map (function `Subcommand sc -> Some sc | _ -> None) items in
|
||||
{ entries; subcommands })
|
||||
|
||||
let parse_help txt =
|
||||
Angstrom.parse_string ~consume:Consume.Prefix help_parser txt
|
||||
let clean = strip_ansi txt in
|
||||
match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with
|
||||
| Ok result -> Ok result
|
||||
| Error msg -> Error msg
|
||||
|
||||
(* --- Pretty printers --- *)
|
||||
|
||||
let print_switch = function
|
||||
| Short o -> Printf.sprintf "Short: %c" o
|
||||
|
|
@ -75,3 +273,11 @@ let print_entry e =
|
|||
Printf.printf
|
||||
"\n\t** ENTRY **\n\tSwitch: %s\n\tParam: %s\n\tDescription: %s\n"
|
||||
(print_switch e.switch) (print_opt e.param) e.desc
|
||||
|
||||
let print_subcommand sc =
|
||||
Printf.printf "\n\t** SUBCOMMAND **\n\tName: %s\n\tDescription: %s\n"
|
||||
sc.name sc.desc
|
||||
|
||||
let print_help_result r =
|
||||
List.iter print_entry r.entries;
|
||||
List.iter print_subcommand r.subcommands
|
||||
|
|
|
|||
109
nix/module.nix
Normal file
109
nix/module.nix
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# NixOS module: automatic nushell completion generation from manpages
|
||||
#
|
||||
# Modeled on nixpkgs' programs/fish.nix completion generator.
|
||||
# For each package in environment.systemPackages, a small derivation runs
|
||||
# `inshellah manpage-dir` against its share/man directory. Results are merged
|
||||
# into a single directory and placed in nushell's vendor autoload path.
|
||||
#
|
||||
# Usage in your NixOS configuration:
|
||||
#
|
||||
# { pkgs, ... }: {
|
||||
# imports = [ ./path/to/inshellah/nix/module.nix ];
|
||||
# programs.inshellah.enable = true;
|
||||
# # Optionally add packages not in systemPackages:
|
||||
# # programs.inshellah.extraPackages = [ pkgs.kubectl ];
|
||||
# }
|
||||
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
let
|
||||
cfg = config.programs.inshellah;
|
||||
|
||||
# The inshellah binary — override this if you build from the local flake
|
||||
inshellahPkg = cfg.package;
|
||||
|
||||
# Per-package derivation: run inshellah manpage-dir against a package's manpages
|
||||
generateCompletions = package:
|
||||
pkgs.runCommandLocal
|
||||
(let
|
||||
inherit (lib.strings) stringLength substring storeDir;
|
||||
storeLength = stringLength storeDir + 34;
|
||||
pathName = substring storeLength (stringLength package - storeLength) package;
|
||||
in
|
||||
(package.name or pathName) + "_nu-completions"
|
||||
)
|
||||
({
|
||||
inherit package;
|
||||
nativeBuildInputs = [ inshellahPkg ];
|
||||
} // lib.optionalAttrs (package ? meta.priority) {
|
||||
meta.priority = package.meta.priority;
|
||||
})
|
||||
''
|
||||
mkdir -p $out
|
||||
if [ -d "$package/share/man" ]; then
|
||||
inshellah manpage-dir "$package/share/man" > "$out/completions.nu" 2>/dev/null || true
|
||||
# Remove empty files
|
||||
find $out -empty -delete
|
||||
fi
|
||||
'';
|
||||
|
||||
in {
|
||||
options.programs.inshellah = {
|
||||
enable = lib.mkEnableOption "nushell completion generation from manpages via inshellah";
|
||||
|
||||
package = lib.mkOption {
|
||||
type = lib.types.package;
|
||||
description = "The inshellah package to use for generating completions.";
|
||||
};
|
||||
|
||||
extraPackages = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.package;
|
||||
default = [];
|
||||
description = ''
|
||||
Additional packages to generate nushell completions from, beyond
|
||||
those in {option}`environment.systemPackages`.
|
||||
'';
|
||||
};
|
||||
|
||||
generatedCompletionsPath = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "/share/nushell/vendor/autoload";
|
||||
description = ''
|
||||
Subdirectory within the merged environment where completion files
|
||||
are placed. The default matches nushell's vendor autoload convention
|
||||
(discovered via XDG_DATA_DIRS).
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
# Merge all per-package completions into a single directory.
|
||||
# This path ends up in the system profile, and nushell discovers it
|
||||
# via XDG_DATA_DIRS -> $prefix/share/nushell/vendor/autoload/
|
||||
environment.pathsToLink = [ cfg.generatedCompletionsPath ];
|
||||
|
||||
environment.systemPackages = [
|
||||
(pkgs.buildEnv {
|
||||
name = "nushell-generated-completions";
|
||||
ignoreCollisions = true;
|
||||
paths = map generateCompletions (
|
||||
config.environment.systemPackages ++ cfg.extraPackages
|
||||
);
|
||||
pathsToLink = [ "/" ];
|
||||
# Nest everything under the vendor autoload path
|
||||
postBuild = ''
|
||||
if [ -d "$out" ]; then
|
||||
tmp=$(mktemp -d)
|
||||
cp -r "$out/"* "$tmp/" 2>/dev/null || true
|
||||
rm -rf "$out/"*
|
||||
mkdir -p "$out${cfg.generatedCompletionsPath}"
|
||||
for f in "$tmp"/*.nu; do
|
||||
[ -f "$f" ] && cp "$f" "$out${cfg.generatedCompletionsPath}/"
|
||||
done
|
||||
rm -rf "$tmp"
|
||||
fi
|
||||
'';
|
||||
})
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
@ -1,2 +1,3 @@
|
|||
(test
|
||||
(name test_inshellah_parser))
|
||||
(name test_inshellah)
|
||||
(libraries inshellah str))
|
||||
|
|
|
|||
326
test/test_inshellah.ml
Normal file
326
test/test_inshellah.ml
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
open Inshellah.Parser
|
||||
open Inshellah.Manpage
|
||||
open Inshellah.Nushell
|
||||
|
||||
let failures = ref 0
|
||||
let passes = ref 0
|
||||
|
||||
let check name condition =
|
||||
if condition then begin
|
||||
incr passes;
|
||||
Printf.printf " PASS: %s\n" name
|
||||
end else begin
|
||||
incr failures;
|
||||
Printf.printf " FAIL: %s\n" name
|
||||
end
|
||||
|
||||
let parse txt =
|
||||
match parse_help txt with
|
||||
| Ok r -> r
|
||||
| Error msg -> failwith (Printf.sprintf "parse_help failed: %s" msg)
|
||||
|
||||
(* --- Help parser tests --- *)
|
||||
|
||||
let test_gnu_basic () =
|
||||
Printf.printf "\n== GNU basic flags ==\n";
|
||||
let r = parse " -a, --all do not ignore entries starting with .\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "both switch" (e.switch = Both ('a', "all"));
|
||||
check "no param" (e.param = None);
|
||||
check "desc" (String.length e.desc > 0)
|
||||
|
||||
let test_gnu_eq_param () =
|
||||
Printf.printf "\n== GNU = param ==\n";
|
||||
let r = parse " --block-size=SIZE scale sizes by SIZE\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "long switch" (e.switch = Long "block-size");
|
||||
check "mandatory param" (e.param = Some (Mandatory "SIZE"))
|
||||
|
||||
let test_gnu_opt_param () =
|
||||
Printf.printf "\n== GNU optional param ==\n";
|
||||
let r = parse " --color[=WHEN] color the output WHEN\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "long switch" (e.switch = Long "color");
|
||||
check "optional param" (e.param = Some (Optional "WHEN"))
|
||||
|
||||
let test_underscore_param () =
|
||||
Printf.printf "\n== Underscore in param (TIME_STYLE) ==\n";
|
||||
let r = parse " --time-style=TIME_STYLE time/date format\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "param with underscore" (e.param = Some (Mandatory "TIME_STYLE"))
|
||||
|
||||
let test_short_only () =
|
||||
Printf.printf "\n== Short-only flag ==\n";
|
||||
let r = parse " -v verbose output\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
check "short switch" ((List.hd r.entries).switch = Short 'v')
|
||||
|
||||
let test_long_only () =
|
||||
Printf.printf "\n== Long-only flag ==\n";
|
||||
let r = parse " --help display help\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
check "long switch" ((List.hd r.entries).switch = Long "help")
|
||||
|
||||
let test_multiline_desc () =
|
||||
Printf.printf "\n== Multi-line description ==\n";
|
||||
let r = parse {| --block-size=SIZE with -l, scale sizes by SIZE when printing them;
|
||||
e.g., '--block-size=M'; see SIZE format below
|
||||
|} in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "desc includes continuation" (String.length e.desc > 50)
|
||||
|
||||
let test_multiple_entries () =
|
||||
Printf.printf "\n== Multiple entries ==\n";
|
||||
let r = parse {| -a, --all do not ignore entries starting with .
|
||||
-A, --almost-all do not list implied . and ..
|
||||
--author with -l, print the author of each file
|
||||
|} in
|
||||
check "three entries" (List.length r.entries = 3)
|
||||
|
||||
let test_clap_short_sections () =
|
||||
Printf.printf "\n== Clap short with section headers ==\n";
|
||||
let r = parse {|INPUT OPTIONS:
|
||||
-e, --regexp=PATTERN A pattern to search for.
|
||||
-f, --file=PATTERNFILE Search for patterns from the given file.
|
||||
SEARCH OPTIONS:
|
||||
-s, --case-sensitive Search case sensitively.
|
||||
|} in
|
||||
check "three entries" (List.length r.entries = 3);
|
||||
let e = List.hd r.entries in
|
||||
check "first is regexp" (e.switch = Both ('e', "regexp"));
|
||||
check "first has param" (e.param = Some (Mandatory "PATTERN"))
|
||||
|
||||
let test_clap_long_style () =
|
||||
Printf.printf "\n== Clap long style (desc below flag) ==\n";
|
||||
let r = parse {| -H, --hidden
|
||||
Include hidden directories and files.
|
||||
|
||||
--no-ignore
|
||||
Do not respect ignore files.
|
||||
|} in
|
||||
check "two entries" (List.length r.entries = 2);
|
||||
let e = List.hd r.entries in
|
||||
check "hidden switch" (e.switch = Both ('H', "hidden"));
|
||||
check "desc below" (String.length e.desc > 0)
|
||||
|
||||
let test_clap_long_angle_param () =
|
||||
Printf.printf "\n== Clap long angle bracket param ==\n";
|
||||
let r = parse {| --nonprintable-notation <notation>
|
||||
Set notation for non-printable characters.
|
||||
|} in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "long switch" (e.switch = Long "nonprintable-notation");
|
||||
check "angle param" (e.param = Some (Mandatory "notation"))
|
||||
|
||||
let test_space_upper_param () =
|
||||
Printf.printf "\n== Space-separated ALL_CAPS param ==\n";
|
||||
let r = parse " -f, --foo FOO foo help\n" in
|
||||
check "one entry" (List.length r.entries = 1);
|
||||
let e = List.hd r.entries in
|
||||
check "switch" (e.switch = Both ('f', "foo"));
|
||||
check "space param" (e.param = Some (Mandatory "FOO"))
|
||||
|
||||
let test_go_cobra_flags () =
|
||||
Printf.printf "\n== Go/Cobra flags ==\n";
|
||||
let r = parse {|Flags:
|
||||
-D, --debug Enable debug mode
|
||||
-H, --host string Daemon socket to connect to
|
||||
-v, --version Print version information
|
||||
|} in
|
||||
check "three flag entries" (List.length r.entries = 3);
|
||||
(* Check the host flag has a type param *)
|
||||
let host = List.nth r.entries 1 in
|
||||
check "host switch" (host.switch = Both ('H', "host"));
|
||||
check "host type param" (host.param = Some (Mandatory "string"))
|
||||
|
||||
let test_go_cobra_subcommands () =
|
||||
Printf.printf "\n== Go/Cobra subcommands ==\n";
|
||||
let r = parse {|Common Commands:
|
||||
run Create and run a new container from an image
|
||||
exec Execute a command in a running container
|
||||
build Build an image from a Dockerfile
|
||||
|} in
|
||||
check "has subcommands" (List.length r.subcommands > 0)
|
||||
|
||||
let test_busybox_tab () =
|
||||
Printf.printf "\n== Busybox tab-indented ==\n";
|
||||
let r = parse "\t-1\tOne column output\n\t-a\tInclude names starting with .\n" in
|
||||
check "two entries" (List.length r.entries = 2);
|
||||
check "first is -1" ((List.hd r.entries).switch = Short '1')
|
||||
|
||||
let test_no_debug_prints () =
|
||||
Printf.printf "\n== No debug side effects ==\n";
|
||||
(* The old parser had print_endline at module load time.
|
||||
If we got here without "opt param is running" on stdout, we're good. *)
|
||||
check "no debug prints" true
|
||||
|
||||
(* --- Manpage parser tests --- *)
|
||||
|
||||
let test_manpage_tp_style () =
|
||||
Printf.printf "\n== Manpage .TP style ==\n";
|
||||
let groff = {|.SH OPTIONS
|
||||
.TP
|
||||
\fB\-a\fR, \fB\-\-all\fR
|
||||
do not ignore entries starting with .
|
||||
.TP
|
||||
\fB\-A\fR, \fB\-\-almost\-all\fR
|
||||
do not list implied . and ..
|
||||
.TP
|
||||
\fB\-\-block\-size\fR=\fISIZE\fR
|
||||
with \fB\-l\fR, scale sizes by SIZE
|
||||
.SH AUTHOR
|
||||
Written by someone.
|
||||
|} in
|
||||
let entries = parse_manpage_string groff in
|
||||
check "three entries" (List.length entries = 3);
|
||||
if List.length entries >= 1 then begin
|
||||
let e = List.hd entries in
|
||||
check "first is -a/--all" (e.switch = Both ('a', "all"));
|
||||
check "first desc" (String.length e.desc > 0)
|
||||
end;
|
||||
if List.length entries >= 3 then begin
|
||||
let e = List.nth entries 2 in
|
||||
check "block-size switch" (e.switch = Long "block-size");
|
||||
check "block-size param" (e.param = Some (Mandatory "SIZE"))
|
||||
end
|
||||
|
||||
let test_manpage_ip_style () =
|
||||
Printf.printf "\n== Manpage .IP style ==\n";
|
||||
let groff = {|.SH OPTIONS
|
||||
.IP "\fB\-k\fR, \fB\-\-insecure\fR"
|
||||
Allow insecure connections.
|
||||
.IP "\fB\-o\fR, \fB\-\-output\fR \fIfile\fR"
|
||||
Write output to file.
|
||||
.SH SEE ALSO
|
||||
|} in
|
||||
let entries = parse_manpage_string groff in
|
||||
check "two entries" (List.length entries = 2);
|
||||
if List.length entries >= 1 then begin
|
||||
let e = List.hd entries in
|
||||
check "first is -k/--insecure" (e.switch = Both ('k', "insecure"))
|
||||
end
|
||||
|
||||
let test_manpage_groff_stripping () =
|
||||
Printf.printf "\n== Groff escape stripping ==\n";
|
||||
let s = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in
|
||||
check "font escapes removed" (not (String.contains s 'f' && String.contains s 'B'));
|
||||
check "dashes converted" (String.contains s '-');
|
||||
let s2 = strip_groff_escapes {|\(aqhello\(aq|} in
|
||||
check "aq -> quote" (String.contains s2 '\'')
|
||||
|
||||
let test_manpage_empty_options () =
|
||||
Printf.printf "\n== Manpage with no OPTIONS section ==\n";
|
||||
let groff = {|.SH NAME
|
||||
foo \- does stuff
|
||||
.SH DESCRIPTION
|
||||
Does stuff.
|
||||
|} in
|
||||
let entries = parse_manpage_string groff in
|
||||
check "no entries" (List.length entries = 0)
|
||||
|
||||
(* --- Nushell generation tests --- *)
|
||||
|
||||
let contains s sub =
|
||||
try
|
||||
let _ = Str.search_forward (Str.regexp_string sub) s 0 in true
|
||||
with Not_found -> false
|
||||
|
||||
let test_nushell_basic () =
|
||||
Printf.printf "\n== Nushell basic extern ==\n";
|
||||
let r = parse " -a, --all do not ignore entries starting with .\n" in
|
||||
let nu = generate_extern "ls" r in
|
||||
check "has extern" (contains nu "export extern \"ls\"");
|
||||
check "has --all(-a)" (contains nu "--all(-a)");
|
||||
check "has comment" (contains nu "# do not ignore")
|
||||
|
||||
let test_nushell_param_types () =
|
||||
Printf.printf "\n== Nushell param type mapping ==\n";
|
||||
let r = parse {| -w, --width=COLS set output width
|
||||
--block-size=SIZE scale sizes
|
||||
-o, --output FILE output file
|
||||
|} in
|
||||
let nu = generate_extern "ls" r in
|
||||
check "COLS -> int" (contains nu "--width(-w): int");
|
||||
check "SIZE -> string" (contains nu "--block-size: string");
|
||||
check "FILE -> path" (contains nu "--output(-o): path")
|
||||
|
||||
let test_nushell_subcommands () =
|
||||
Printf.printf "\n== Nushell subcommands ==\n";
|
||||
let r = parse {|Common Commands:
|
||||
run Create and run a new container
|
||||
exec Execute a command
|
||||
|
||||
Flags:
|
||||
-D, --debug Enable debug mode
|
||||
|} in
|
||||
let nu = generate_extern "docker" r in
|
||||
check "has main extern" (contains nu "export extern \"docker\"");
|
||||
check "has --debug" (contains nu "--debug(-D)");
|
||||
check "has run subcommand" (contains nu "export extern \"docker run\"");
|
||||
check "has exec subcommand" (contains nu "export extern \"docker exec\"")
|
||||
|
||||
let test_nushell_from_manpage () =
|
||||
Printf.printf "\n== Nushell from manpage ==\n";
|
||||
let groff = {|.SH OPTIONS
|
||||
.TP
|
||||
\fB\-a\fR, \fB\-\-all\fR
|
||||
do not ignore entries starting with .
|
||||
.TP
|
||||
\fB\-\-block\-size\fR=\fISIZE\fR
|
||||
scale sizes by SIZE
|
||||
.SH AUTHOR
|
||||
|} in
|
||||
let entries = parse_manpage_string groff in
|
||||
let nu = generate_extern_from_entries "ls" entries in
|
||||
check "has extern" (contains nu "export extern \"ls\"");
|
||||
check "has --all(-a)" (contains nu "--all(-a)");
|
||||
check "has --block-size" (contains nu "--block-size: string")
|
||||
|
||||
let test_nushell_module () =
|
||||
Printf.printf "\n== Nushell module wrapper ==\n";
|
||||
let r = parse " -v, --verbose verbose output\n" in
|
||||
let nu = generate_module "myapp" r in
|
||||
check "has module" (contains nu "module myapp-completions");
|
||||
check "has extern inside" (contains nu "export extern \"myapp\"");
|
||||
check "has flag" (contains nu "--verbose(-v)")
|
||||
|
||||
let () =
|
||||
Printf.printf "Running help parser tests...\n";
|
||||
test_gnu_basic ();
|
||||
test_gnu_eq_param ();
|
||||
test_gnu_opt_param ();
|
||||
test_underscore_param ();
|
||||
test_short_only ();
|
||||
test_long_only ();
|
||||
test_multiline_desc ();
|
||||
test_multiple_entries ();
|
||||
test_clap_short_sections ();
|
||||
test_clap_long_style ();
|
||||
test_clap_long_angle_param ();
|
||||
test_space_upper_param ();
|
||||
test_go_cobra_flags ();
|
||||
test_go_cobra_subcommands ();
|
||||
test_busybox_tab ();
|
||||
test_no_debug_prints ();
|
||||
|
||||
Printf.printf "\nRunning manpage parser tests...\n";
|
||||
test_manpage_tp_style ();
|
||||
test_manpage_ip_style ();
|
||||
test_manpage_groff_stripping ();
|
||||
test_manpage_empty_options ();
|
||||
|
||||
Printf.printf "\nRunning nushell generation tests...\n";
|
||||
test_nushell_basic ();
|
||||
test_nushell_param_types ();
|
||||
test_nushell_subcommands ();
|
||||
test_nushell_from_manpage ();
|
||||
test_nushell_module ();
|
||||
|
||||
Printf.printf "\n=== Results: %d passed, %d failed ===\n" !passes !failures;
|
||||
if !failures > 0 then exit 1
|
||||
Loading…
Add table
Add a link
Reference in a new issue