diff --git a/bin/dune b/bin/dune index 369fe51..4bb8309 100644 --- a/bin/dune +++ b/bin/dune @@ -1,4 +1,4 @@ (executable - (public_name inshellah-parser) + (public_name inshellah) (name main) - (libraries inshellah_parser)) + (libraries inshellah)) diff --git a/bin/main.ml b/bin/main.ml index 81c3007..4f07951 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -1,171 +1,112 @@ -open Inshellah_parser.Parser +open Inshellah.Parser +open Inshellah.Manpage +open Inshellah.Nushell + +let usage () = + Printf.eprintf {|inshellah — generate nushell completions from manpages and --help output + +Usage: + inshellah manpage FILE Parse a manpage (.1, .1.gz) and emit nushell extern + inshellah manpage-dir DIR Batch-process all manpages under DIR/man1/ + inshellah help CMD [ARGS...] Run CMD ARGS --help, parse output, emit nushell extern + inshellah parse-help CMD Read --help text from stdin, emit nushell extern for CMD + inshellah demo Run built-in demo + +|}; + exit 1 + +(* Extract command name from a manpage filename *) +let cmd_name_of_manpage path = + let base = Filename.basename path in + (* strip .gz if present *) + let base = + if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz" + else base + in + (* strip .N section suffix *) + try Filename.chop_extension base + with Invalid_argument _ -> base + +let cmd_manpage file = + let cmd = cmd_name_of_manpage file in + let entries = parse_manpage_file file in + if entries <> [] then + print_string (generate_extern_from_entries cmd entries) + +let cmd_manpage_dir dir = + (* Walk man1/ through man9/ looking for manpages *) + for section = 1 to 9 do + let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in + if Sys.file_exists subdir && Sys.is_directory subdir then begin + let files = Sys.readdir subdir in + Array.sort String.compare files; + Array.iter (fun file -> + let path = Filename.concat subdir file in + try cmd_manpage path + with _ -> () (* skip unparseable manpages *) + ) files + end + done + +let cmd_help args = + match args with + | [] -> Printf.eprintf "error: help mode requires a command name\n"; exit 1 + | cmd :: rest -> + let full_cmd = + String.concat " " (List.map Filename.quote (cmd :: rest @ ["--help"])) + in + let ic = Unix.open_process_in (full_cmd ^ " 2>&1") in + let buf = Buffer.create 4096 in + (try while true do + let line = input_line ic in + Buffer.add_string buf line; + Buffer.add_char buf '\n' + done with End_of_file -> ()); + let _ = Unix.close_process_in ic in + let text = Buffer.contents buf in + let cmd_name = Filename.basename cmd in + (match parse_help text with + | Ok r -> print_string (generate_extern cmd_name r) + | Error msg -> Printf.eprintf "parse error for %s: %s\n" cmd_name msg; exit 1) + +let cmd_parse_help cmd = + let buf = Buffer.create 4096 in + (try while true do + let line = input_line stdin in + Buffer.add_string buf line; + Buffer.add_char buf '\n' + done with End_of_file -> ()); + let text = Buffer.contents buf in + (match parse_help text with + | Ok r -> print_string (generate_extern cmd r) + | Error msg -> Printf.eprintf "parse error for %s: %s\n" cmd msg; exit 1) + +let cmd_demo () = + let ls_help = + {|Usage: ls [OPTION]... [FILE]... +List information about the FILEs (the current directory by default). + + -a, --all do not ignore entries starting with . + -A, --almost-all do not list implied . and .. + --block-size=SIZE with -l, scale sizes by SIZE when printing them; + e.g., '--block-size=M'; see SIZE format below + --color[=WHEN] color the output WHEN + -h, --human-readable with -l and -s, print sizes like 1K 234M 2G etc. + --help display this help and exit + --version output version information and exit +|} + in + Printf.printf "# Generated by: inshellah demo\n\n"; + (match parse_help ls_help with + | Ok r -> print_string (generate_extern "ls" r) + | Error msg -> Printf.eprintf "parse error: %s\n" msg) let () = - let _cp = - {| - -A, --show-all equivalent to -vET - -b, --number-nonblank number nonempty output lines, overrides -n - -e equivalent to -vE - -E, --show-ends display $ or ^M$ at end of each line - -n, --number number all output lines - -s, --squeeze-blank suppress repeated empty output lines - -t equivalent to -vT - -T, --show-tabs display TAB characters as ^I - -u (ignored) - -v, --show-nonprinting use ^ and M- notation, except for LFD and TAB - --help - display this help and exit - --version - output version information and exit -|} - and ls = - {| - -a, --all - do not ignore entries starting with . - -A, --almost-all - do not list implied . and .. - --author - with -l, print the author of each file - -b, --escape - print C-style escapes for nongraphic characters - --block-size=SIZE - with -l, scale sizes by SIZE when printing them; - e.g., '--block-size=M'; see SIZE format below - -B, --ignore-backups - do not list implied entries ending with ~ - -c - with -lt: sort by, and show, ctime - (time of last change of file status information); - with -l: show ctime and sort by name; - otherwise: sort by ctime, newest first - -C - list entries by columns - --color[=WHEN] - color the output WHEN; more info below - -d, --directory - list directories themselves, not their contents - -D, --dired - generate output designed for Emacs' dired mode - -f - same as -a -U - -F, --classify[=WHEN] - append indicator (one of */=>@|) to entries WHEN - --file-type - like -F, except do not append '*' - --format=WORD - across,horizontal (-x), commas (-m), long (-l), - single-column (-1), verbose (-l), vertical (-C) - --full-time - like -l --time-style=full-iso - -g - like -l, but do not list owner - --group-directories-first - group directories before files - -G, --no-group - in a long listing, don't print group names - -h, --human-readable - with -l and -s, print sizes like 1K 234M 2G etc. - --si - likewise, but use powers of 1000 not 1024 - -H, --dereference-command-line - follow symbolic links listed on the command line - --dereference-command-line-symlink-to-dir - follow each command line symbolic link that points to a directory - --hide=PATTERN - do not list implied entries matching shell PATTERN - (overridden by -a or -A) - --hyperlink[=WHEN] - hyperlink file names WHEN - --indicator-style=WORD - append indicator with style WORD to entry names: - none (default), slash (-p), file-type (--file-type), classify (-F) - -i, --inode - print the index number of each file - -I, --ignore=PATTERN - do not list implied entries matching shell PATTERN - -k, --kibibytes - default to 1024-byte blocks for file system usage; - used only with -s and per directory totals - -l - use a long listing format - -L, --dereference - when showing file information for a symbolic link, - show information for the file the link references - rather than for the link itself - -m - fill width with a comma separated list of entries - -n, --numeric-uid-gid - like -l, but list numeric user and group IDs - -N, --literal - print entry names without quoting - -o - like -l, but do not list group information - -p, --indicator-style=slash - append / indicator to directories - -q, --hide-control-chars - print ? instead of nongraphic characters - --show-control-chars - show nongraphic characters as-is; - the default, unless program is 'ls' and output is a terminal - -Q, --quote-name - enclose entry names in double quotes - --quoting-style=WORD - use quoting style WORD for entry names: - literal, locale, shell, shell-always, - shell-escape, shell-escape-always, c, escape - (overrides QUOTING_STYLE environment variable) - -r, --reverse - reverse order while sorting - -R, --recursive - list subdirectories recursively - -s, --size - print the allocated size of each file, in blocks - -S - sort by file size, largest first - --sort=WORD - change default 'name' sort to WORD: - none (-U), size (-S), time (-t), - version (-v), extension (-X), name, width - --time=WORD - select which timestamp used to display or sort; - access time (-u): atime, access, use; - metadata change time (-c): ctime, status; - modified time (default): mtime, modification; - birth time: birth, creation; - with -l, WORD determines which time to show; - with --sort=time, sort by WORD (newest first) - --time-style=TIME_STYLE - time/date format with -l; see TIME_STYLE below - -t - sort by time, newest first; see --time - -T, --tabsize=COLS - assume tab stops at each COLS instead of 8 - -u - with -lt: sort by, and show, access time; - with -l: show access time and sort by name; - otherwise: sort by access time, newest first - -U - do not sort directory entries - -v - natural sort of (version) numbers within text - -w, --width=COLS - set output width to COLS. 0 means no limit - -x - list entries by lines instead of by columns - -X - sort alphabetically by entry extension - -Z, --context - print any security context of each file - --zero - end each output line with NUL, not newline - -1 - list one file per line - --help - display this help and exit - --version - output version information and exit - |} - in - match parse_help ls with - | Ok entries -> List.iter (fun e -> print_entry e) entries - | Error msg -> Printf.eprintf "parse error: %s\n" msg + let args = Array.to_list Sys.argv |> List.tl in + match args with + | ["manpage"; file] -> cmd_manpage file + | ["manpage-dir"; dir] -> cmd_manpage_dir dir + | "help" :: rest -> cmd_help rest + | ["parse-help"; cmd] -> cmd_parse_help cmd + | ["demo"] -> cmd_demo () + | _ -> usage () diff --git a/dune-project b/dune-project index b904d6a..21eaff0 100644 --- a/dune-project +++ b/dune-project @@ -1,6 +1,6 @@ (lang dune 3.20) -(name inshellah-parser) +(name inshellah) (generate_opam_files true) @@ -16,7 +16,7 @@ (documentation https://url/to/documentation) (package - (name inshellah-parser) + (name inshellah) (synopsis "A short synopsis") (description "A longer description") (depends diff --git a/flake.nix b/flake.nix index e475c84..e9b3374 100644 --- a/flake.nix +++ b/flake.nix @@ -28,5 +28,32 @@ }; } ); + + packages = forAllSystems ( + pkgs: sys: { + default = pkgs.ocamlPackages.buildDunePackage { + pname = "inshellah"; + version = "0.1"; + src = ./.; + nativeBuildInputs = [ pkgs.git ]; + buildInputs = with pkgs.ocamlPackages; [ + dune_3 + ocaml + angstrom + angstrom-unix + ]; + + meta.mainProgram = "inshellah"; + + }; + } + ); + + nixosModules.default = + { pkgs, ... }: + { + imports = [ ./nix/module.nix ]; + programs.inshellah.package = self.packages.${pkgs.stdenv.hostPlatform.system}.default; + }; }; } diff --git a/inshellah-parser.opam b/inshellah-parser.opam deleted file mode 100644 index 726c5a9..0000000 --- a/inshellah-parser.opam +++ /dev/null @@ -1,35 +0,0 @@ -# This file is generated by dune, edit dune-project instead -opam-version: "2.0" -synopsis: "A short synopsis" -description: "A longer description" -maintainer: ["Maintainer Name "] -authors: ["Author Name "] -license: "LICENSE" -tags: ["add topics" "to describe" "your" "project"] -homepage: "https://github.com/username/reponame" -doc: "https://url/to/documentation" -bug-reports: "https://github.com/username/reponame/issues" -depends: [ - "ocaml" - "dune" {>= "3.20"} - "angstrom" - "angstrom-unix" - "ppx_inline_test" {with-test} - "odoc" {with-doc} -] -build: [ - ["dune" "subst"] {dev} - [ - "dune" - "build" - "-p" - name - "-j" - jobs - "@install" - "@runtest" {with-test} - "@doc" {with-doc} - ] -] -dev-repo: "git+https://github.com/username/reponame.git" -x-maintenance-intent: ["(latest)"] diff --git a/lib/dune b/lib/dune index 6c2da6b..9c8ae2e 100644 --- a/lib/dune +++ b/lib/dune @@ -1,3 +1,3 @@ (library - (name inshellah_parser) - (libraries angstrom angstrom-unix)) + (name inshellah) + (libraries angstrom angstrom-unix str unix)) diff --git a/lib/manpage.ml b/lib/manpage.ml new file mode 100644 index 0000000..bd5eb0d --- /dev/null +++ b/lib/manpage.ml @@ -0,0 +1,415 @@ +open Parser + +(* --- Groff escape/formatting stripper --- *) + +let strip_groff_escapes s = + let buf = Buffer.create (String.length s) in + let len = String.length s in + let i = ref 0 in + while !i < len do + if s.[!i] = '\\' && !i + 1 < len then begin + let next = s.[!i + 1] in + match next with + | 'f' -> + (* Font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...] *) + if !i + 2 < len then begin + if s.[!i + 2] = '(' then + i := !i + 4 (* \f(XX *) + else if s.[!i + 2] = '[' then begin + (* \f[...] - skip to ] *) + i := !i + 3; + while !i < len && s.[!i] <> ']' do incr i done; + if !i < len then incr i + end else + i := !i + 3 (* \fX *) + end else + i := !i + 2 + | '-' -> + Buffer.add_char buf '-'; + i := !i + 2 + | '&' | '/' | ',' -> + (* Zero-width characters *) + i := !i + 2 + | '(' -> + (* Two-char named character: \(aq, \(lq, \(rq, etc. *) + if !i + 3 < len then begin + let name = String.sub s (!i + 2) 2 in + (match name with + | "aq" -> Buffer.add_char buf '\'' + | "lq" | "Lq" -> Buffer.add_char buf '"' + | "rq" | "Rq" -> Buffer.add_char buf '"' + | "em" | "en" -> Buffer.add_char buf '-' + | _ -> ()); + i := !i + 4 + end else + i := !i + 2 + | '[' -> + (* Named character: \[...] *) + i := !i + 2; + let start = !i in + while !i < len && s.[!i] <> ']' do incr i done; + if !i < len then begin + let name = String.sub s start (!i - start) in + (match name with + | "aq" -> Buffer.add_char buf '\'' + | "lq" | "Lq" -> Buffer.add_char buf '"' + | "rq" | "Rq" -> Buffer.add_char buf '"' + | _ -> ()); + incr i + end + | 's' -> + (* Size escape: \sN, \s+N, \s-N, \s'N' *) + i := !i + 2; + if !i < len && (s.[!i] = '+' || s.[!i] = '-') then incr i; + if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i; + if !i < len && s.[!i] >= '0' && s.[!i] <= '9' then incr i + | 'm' -> + (* Color escape: \m[...] *) + i := !i + 2; + if !i < len && s.[!i] = '[' then begin + incr i; + while !i < len && s.[!i] <> ']' do incr i done; + if !i < len then incr i + end + | 'X' -> + (* Device control: \X'...' *) + i := !i + 2; + if !i < len && s.[!i] = '\'' then begin + incr i; + while !i < len && s.[!i] <> '\'' do incr i done; + if !i < len then incr i + end + | '*' -> + (* String variable: \*X or \*(XX or \*[...] *) + i := !i + 2; + if !i < len then begin + if s.[!i] = '(' then + i := !i + 2 + else if s.[!i] = '[' then begin + incr i; + while !i < len && s.[!i] <> ']' do incr i done; + if !i < len then incr i + end else + incr i + end + | 'n' -> + (* Number register: \nX or \n(XX or \n[...] *) + i := !i + 2; + if !i < len then begin + if s.[!i] = '(' then + i := !i + 2 + else if s.[!i] = '[' then begin + incr i; + while !i < len && s.[!i] <> ']' do incr i done; + if !i < len then incr i + end else + incr i + end + | 'e' -> + Buffer.add_char buf '\\'; + i := !i + 2 + | '\\' -> + Buffer.add_char buf '\\'; + i := !i + 2 + | ' ' -> + Buffer.add_char buf ' '; + i := !i + 2 + | _ -> + (* Unknown escape, skip *) + i := !i + 2 + end else begin + Buffer.add_char buf s.[!i]; + incr i + end + done; + Buffer.contents buf + +(* Strip inline macro formatting: .BI, .BR, .IR, etc. + These macros alternate between fonts for their arguments. + We just concatenate the arguments. *) +let strip_inline_macro_args s = + (* Arguments are separated by spaces, quoted strings are kept together *) + let buf = Buffer.create (String.length s) in + let len = String.length s in + let i = ref 0 in + while !i < len do + if s.[!i] = '"' then begin + incr i; + while !i < len && s.[!i] <> '"' do + Buffer.add_char buf s.[!i]; + incr i + done; + if !i < len then incr i + end else if s.[!i] = ' ' || s.[!i] = '\t' then begin + incr i + end else begin + Buffer.add_char buf s.[!i]; + incr i + end + done; + Buffer.contents buf + +let strip_groff line = + let s = strip_groff_escapes line in + String.trim s + +(* --- Line classification --- *) + +type groff_line = + | Macro of string * string (* e.g. ("SH", "OPTIONS") or ("TP", "") *) + | Text of string (* plain text after stroff stripping *) + | Blank + | Comment + +let classify_line line = + let len = String.length line in + if len = 0 then Blank + else if len >= 2 && line.[0] = '.' && line.[1] = '\\' && (len < 3 || line.[2] = '"') then + Comment + else if len >= 3 && line.[0] = '\\' && line.[1] = '"' then + Comment + else if line.[0] = '.' || line.[0] = '\'' then begin + (* Macro line *) + let rest = String.sub line 1 (len - 1) in + let rest = String.trim rest in + (* Split into macro name and arguments *) + let space_pos = + try Some (String.index rest ' ') + with Not_found -> + try Some (String.index rest '\t') + with Not_found -> None + in + match space_pos with + | Some pos -> + let name = String.sub rest 0 pos in + let args = String.trim (String.sub rest (pos + 1) (String.length rest - pos - 1)) in + (* Strip quotes from args *) + let args = + let alen = String.length args in + if alen >= 2 && args.[0] = '"' && args.[alen - 1] = '"' then + String.sub args 1 (alen - 2) + else args + in + Macro (name, args) + | None -> + Macro (rest, "") + end else begin + let stripped = strip_groff line in + if String.length stripped = 0 then Blank + else Text stripped + end + +(* Check for dot-backslash-quote style comments more carefully *) +let is_comment_line line = + let len = String.length line in + (len >= 3 && line.[0] = '.' && line.[1] = '\\' && line.[2] = '"') + || (len >= 2 && line.[0] = '\\' && line.[1] = '"') + +let classify_line line = + if is_comment_line line then Comment + else classify_line line + +(* --- Section extraction --- *) + +let extract_options_section lines = + let classified = List.map classify_line lines in + let rec collect_until_next_sh lines acc = + match lines with + | [] -> List.rev acc + | Macro ("SH", _) :: _ -> List.rev acc + | line :: rest -> collect_until_next_sh rest (line :: acc) + in + let is_options_section name = + let s = String.uppercase_ascii (String.trim name) in + s = "OPTIONS" + || (String.length s > 0 && + try let _ = Str.search_forward (Str.regexp_string "OPTION") s 0 in true + with Not_found -> false) + in + (* First pass: look for OPTIONS section *) + let rec find_options = function + | [] -> None + | Macro ("SH", args) :: rest when is_options_section args -> + Some (collect_until_next_sh rest []) + | _ :: rest -> find_options rest + in + (* Fallback: DESCRIPTION section *) + let rec find_description = function + | [] -> [] + | Macro ("SH", args) :: rest + when String.uppercase_ascii (String.trim args) = "DESCRIPTION" -> + collect_until_next_sh rest [] + | _ :: rest -> find_description rest + in + match find_options classified with + | Some section -> section + | None -> find_description classified + +(* --- Strategy-based entry extraction --- *) + +(* Collect text lines until next macro or blank *) +let rec collect_text_lines lines acc = + match lines with + | Text s :: rest -> collect_text_lines rest (s :: acc) + | _ -> (String.concat " " (List.rev acc), lines) + +(* Parse a tag line to extract entry using the Angstrom switch_parser *) +let parse_tag_to_entry tag desc = + let tag = strip_groff_escapes tag in + let tag = String.trim tag in + match Angstrom.parse_string ~consume:Angstrom.Consume.Prefix + (Angstrom.lift2 (fun sw p -> (sw, p)) switch_parser param_parser) tag with + | Ok (switch, param) -> Some { switch; param; desc } + | Error _ -> None + +(* Strategy A: .TP style (most common — GNU coreutils, help2man) *) +let strategy_tp lines = + let rec walk lines acc = + match lines with + | [] -> List.rev acc + | Macro ("TP", _) :: rest -> + (* Next text line is the tag *) + begin match rest with + | Text tag :: rest2 -> + let (desc, rest3) = collect_text_lines rest2 [] in + let entry = parse_tag_to_entry tag desc in + walk rest3 (match entry with Some e -> e :: acc | None -> acc) + | _ -> walk rest acc + end + | _ :: rest -> walk rest acc + in + walk lines [] + +(* Strategy B: .IP style (curl, hand-written) *) +let strategy_ip lines = + let rec walk lines acc = + match lines with + | [] -> List.rev acc + | Macro ("IP", tag) :: rest -> + let tag = strip_groff_escapes tag in + let (desc, rest2) = collect_text_lines rest [] in + let entry = parse_tag_to_entry tag desc in + walk rest2 (match entry with Some e -> e :: acc | None -> acc) + | _ :: rest -> walk rest acc + in + walk lines [] + +(* Strategy C: .PP + .RS/.RE style (git, DocBook) *) +let strategy_pp_rs lines = + let rec walk lines acc = + match lines with + | [] -> List.rev acc + | Macro ("PP", _) :: rest -> + begin match rest with + | Text tag :: rest2 -> + (* Look for .RS ... text ... .RE *) + let rec collect_rs lines desc_acc = + match lines with + | Macro ("RS", _) :: rest3 -> + collect_in_rs rest3 desc_acc + | Text s :: rest3 -> + (* Sometimes description follows directly *) + collect_rs rest3 (s :: desc_acc) + | _ -> (String.concat " " (List.rev desc_acc), lines) + and collect_in_rs lines desc_acc = + match lines with + | Macro ("RE", _) :: rest3 -> + (String.concat " " (List.rev desc_acc), rest3) + | Text s :: rest3 -> + collect_in_rs rest3 (s :: desc_acc) + | Macro ("PP", _) :: _ | Macro ("SH", _) :: _ -> + (String.concat " " (List.rev desc_acc), lines) + | _ :: rest3 -> collect_in_rs rest3 desc_acc + | [] -> (String.concat " " (List.rev desc_acc), []) + in + let (desc, rest3) = collect_rs rest2 [] in + let entry = parse_tag_to_entry tag desc in + walk rest3 (match entry with Some e -> e :: acc | None -> acc) + | _ -> walk rest acc + end + | _ :: rest -> walk rest acc + in + walk lines [] + +(* Strategy D: Deroff fallback — strip all groff, use help text parser *) +let strategy_deroff_lines lines = + let buf = Buffer.create 256 in + List.iter (fun line -> + match line with + | Text s -> + Buffer.add_string buf s; + Buffer.add_char buf '\n' + | Macro (("BI" | "BR" | "IR" | "B" | "I"), args) -> + let text = strip_inline_macro_args args in + let text = strip_groff_escapes text in + Buffer.add_string buf text; + Buffer.add_char buf '\n' + | Blank -> Buffer.add_char buf '\n' + | _ -> () + ) lines; + let text = Buffer.contents buf in + match parse_help text with + | Ok result -> result.entries + | Error _ -> [] + +(* Count macros of a given type *) +let count_macro name lines = + List.fold_left (fun n line -> + match line with Macro (m, _) when m = name -> n + 1 | _ -> n + ) 0 lines + +(* Auto-detect and try strategies, return the one with most entries *) +let extract_entries lines = + let results = ref [] in + (* Try TP if .TP macros present *) + if count_macro "TP" lines > 0 then + results := ("TP", strategy_tp lines) :: !results; + (* Try IP if .IP macros present *) + if count_macro "IP" lines > 0 then + results := ("IP", strategy_ip lines) :: !results; + (* Try PP+RS if both present *) + if count_macro "PP" lines > 0 && count_macro "RS" lines > 0 then + results := ("PP+RS", strategy_pp_rs lines) :: !results; + (* Always try deroff as fallback *) + results := ("deroff", strategy_deroff_lines lines) :: !results; + (* Pick the result with the most entries *) + let best = + List.fold_left (fun (best_name, best_entries) (name, entries) -> + if List.length entries >= List.length best_entries then (name, entries) + else (best_name, best_entries) + ) ("none", []) !results + in + snd best + +(* --- Top-level API --- *) + +let parse_manpage_lines lines = + let options_section = extract_options_section lines in + extract_entries options_section + +let parse_manpage_string contents = + let lines = String.split_on_char '\n' contents in + parse_manpage_lines lines + +let parse_manpage_gzipped_file path = + let ic = Unix.open_process_in (Printf.sprintf "gzip -dc %s" (Filename.quote path)) in + let buf = Buffer.create 4096 in + (try while true do + let line = input_line ic in + Buffer.add_string buf line; + Buffer.add_char buf '\n' + done with End_of_file -> ()); + let _ = Unix.close_process_in ic in + parse_manpage_string (Buffer.contents buf) + +let parse_manpage_file path = + if Filename.check_suffix path ".gz" then + parse_manpage_gzipped_file path + else begin + let ic = open_in path in + let n = in_channel_length ic in + let s = Bytes.create n in + really_input ic s 0 n; + close_in ic; + parse_manpage_string (Bytes.to_string s) + end diff --git a/lib/nushell.ml b/lib/nushell.ml new file mode 100644 index 0000000..b6970f3 --- /dev/null +++ b/lib/nushell.ml @@ -0,0 +1,82 @@ +open Parser + +(* Map a param name/type hint to a nushell type *) +let nushell_type_of_param = function + | "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY" + | "FILENAME" | "PATTERNFILE" -> "path" + | "NUM" | "N" | "COUNT" | "NUMBER" | "int" | "INT" | "COLS" | "WIDTH" + | "LINES" | "DEPTH" | "depth" -> "int" + | _ -> "string" + +(* Escape a nushell string: wrap in double quotes, escape inner quotes *) +let escape_nu s = + let buf = Buffer.create (String.length s + 2) in + String.iter (fun c -> + match c with + | '"' -> Buffer.add_string buf "\\\"" + | '\\' -> Buffer.add_string buf "\\\\" + | _ -> Buffer.add_char buf c + ) s; + Buffer.contents buf + +(* Format a single flag for nushell extern *) +let format_flag entry = + let buf = Buffer.create 64 in + Buffer.add_string buf " "; + (* Flag name *) + (match entry.switch with + | Both (s, l) -> + Buffer.add_string buf (Printf.sprintf "--%s(-%c)" l s) + | Long l -> + Buffer.add_string buf (Printf.sprintf "--%s" l) + | Short s -> + Buffer.add_string buf (Printf.sprintf "-%c" s)); + (* Type annotation *) + (match entry.param with + | Some (Mandatory name) -> + Buffer.add_string buf ": "; + Buffer.add_string buf (nushell_type_of_param name) + | Some (Optional name) -> + Buffer.add_string buf ": "; + Buffer.add_string buf (nushell_type_of_param name) + | None -> ()); + (* Description as comment *) + if String.length entry.desc > 0 then begin + (* Pad to align comments *) + let current_len = Buffer.length buf in + let target = max (current_len + 1) 40 in + for _ = current_len to target - 1 do + Buffer.add_char buf ' ' + done; + Buffer.add_string buf "# "; + Buffer.add_string buf entry.desc + end; + Buffer.contents buf + +(* Generate nushell extern definition for a command *) +let generate_extern cmd_name result = + let buf = Buffer.create 1024 in + (* Main extern with flags *) + Buffer.add_string buf (Printf.sprintf "export extern \"%s\" [\n" (escape_nu cmd_name)); + List.iter (fun entry -> + Buffer.add_string buf (format_flag entry); + Buffer.add_char buf '\n' + ) result.entries; + Buffer.add_string buf "]\n"; + (* Subcommand externs *) + List.iter (fun (sc : subcommand) -> + Buffer.add_string buf + (Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n" + (escape_nu cmd_name) (escape_nu sc.name) (escape_nu sc.desc)) + ) result.subcommands; + Buffer.contents buf + +(* Generate a complete nushell module *) +let generate_module cmd_name result = + Printf.sprintf "module %s-completions {\n%s}\n" + cmd_name (generate_extern cmd_name result) + +(* Generate from manpage entries (no subcommands) *) +let generate_extern_from_entries cmd_name entries = + let result = { entries; subcommands = [] } in + generate_extern cmd_name result diff --git a/lib/parser.ml b/lib/parser.ml index 5d4478b..a4eaae7 100644 --- a/lib/parser.ml +++ b/lib/parser.ml @@ -1,16 +1,55 @@ -(* open Angstrom_unix *) -(* also look for "subcommands" for clapslop *) -(* and other common help patterns *) open Angstrom -let ( <| ) = ( @@ ) -let ( <&> ) p1 p2 = lift2 (fun a b -> (a, b)) p1 p2 -let is_whitespace = function ' ' | '\t' | '\n' | '\r' -> true | _ -> false +(* Strip ANSI escape sequences and OSC hyperlinks from --help output *) +let strip_ansi s = + let buf = Buffer.create (String.length s) in + let len = String.length s in + let i = ref 0 in + while !i < len do + if !i + 1 < len && Char.code s.[!i] = 0x1b then begin + let next = s.[!i + 1] in + if next = '[' then begin + (* CSI sequence: ESC [ ... final_byte *) + i := !i + 2; + while !i < len && not (s.[!i] >= '@' && s.[!i] <= '~') do incr i done; + if !i < len then incr i + end else if next = ']' then begin + (* OSC sequence: ESC ] ... (terminated by BEL or ESC \) *) + i := !i + 2; + let found = ref false in + while !i < len && not !found do + if s.[!i] = '\x07' then + (incr i; found := true) + else if !i + 1 < len && Char.code s.[!i] = 0x1b && s.[!i + 1] = '\\' then + (i := !i + 2; found := true) + else + incr i + done + end else begin + (* Other ESC sequence, skip ESC + one char *) + i := !i + 2 + end + end else begin + Buffer.add_char buf s.[!i]; + incr i + end + done; + Buffer.contents buf + +let is_whitespace = function ' ' | '\t' -> true | _ -> false let is_alphanumeric = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true | _ -> false +let is_param_char = function + | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true + | _ -> false + +let is_upper_or_underscore = function + | 'A' .. 'Z' | '_' -> true + | _ -> false + let is_long_char = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' -> true | _ -> false @@ -18,48 +57,207 @@ let is_long_char = function type switch = Short of char | Long of string | Both of char * string type param = Mandatory of string | Optional of string type entry = { switch : switch; param : param option; desc : string } +type subcommand = { name : string; desc : string } +type help_result = { entries : entry list; subcommands : subcommand list } + +(* --- Low-level combinators --- *) + +let inline_ws = skip_while (function ' ' | '\t' -> true | _ -> false) +let eol = end_of_line <|> end_of_input +let eol_strict = end_of_line (* Must consume a newline, no EOF match *) -let whitespace = skip_while is_whitespace -let comma = char ',' *> whitespace let short_switch = char '-' *> satisfy is_alphanumeric let long_switch = string "--" *> take_while1 is_long_char +let comma = char ',' *> inline_ws -let opt_param = - print_endline "opt param is running"; - string "[=" *> take_while is_alphanumeric <* char ']' >>| fun a -> Optional a +(* Parameter parsers *) +let eq_opt_param = + string "[=" *> take_while1 is_param_char <* char ']' >>| fun a -> Optional a -let man_param = - print_endline "man param is running"; - char '=' *> take_while is_alphanumeric >>| fun a -> Mandatory a +let eq_man_param = + char '=' *> take_while1 is_param_char >>| fun a -> Mandatory a + +(* Space-separated ALL_CAPS param: e.g. " FILE", " TIME_STYLE" *) +let space_upper_param = + char ' ' *> peek_char_fail >>= fun c -> + if is_upper_or_underscore c then + take_while1 is_param_char >>= fun name -> + (* Ensure it's truly all-uppercase (not a description word like "Do") *) + if String.length name >= 1 && String.for_all (fun c -> is_upper_or_underscore c || c >= '0' && c <= '9') name then + return (Mandatory name) + else + fail "not an all-caps param" + else + fail "not an uppercase param" + +(* Angle-bracket param: e.g. "", "" *) +let angle_param = + char '<' *> take_while1 (fun c -> c <> '>') <* char '>' >>| fun name -> + Mandatory name + +(* Space + angle bracket param *) +let space_angle_param = + char ' ' *> angle_param + +(* Optional angle bracket param: [] *) +let opt_angle_param = + char '[' *> char '<' *> take_while1 (fun c -> c <> '>') <* char '>' <* char ']' + >>| fun name -> Optional name + +let space_opt_angle_param = + char ' ' *> opt_angle_param + +(* Go/Cobra style: space + lowercase type word like "string", "list", "int" *) +let space_type_param = + char ' ' *> peek_char_fail >>= fun c -> + if c >= 'a' && c <= 'z' then + take_while1 (fun c -> c >= 'a' && c <= 'z') >>= fun name -> + (* Only short type-like words *) + if String.length name <= 10 then + return (Mandatory name) + else + fail "too long for type param" + else + fail "not a lowercase type param" let param_parser = - option None (choice [ opt_param; man_param ] >>| fun a -> Some a) + option None + (choice + [ eq_opt_param; eq_man_param; + space_opt_angle_param; space_angle_param; + space_upper_param; space_type_param ] + >>| fun a -> Some a) +(* Switch parser: -a, --all | -a | --all *) let switch_parser = choice [ - (* -a, --all *) - ( short_switch >>= fun s -> - comma *> long_switch >>| fun l -> Both (s, l) ); - (* -a *) + (short_switch >>= fun s -> + comma *> long_switch >>| fun l -> Both (s, l)); (short_switch >>| fun s -> Short s); - (* --all *) (long_switch >>| fun l -> Long l); ] -let description = whitespace *> take_till (fun c -> c = '\n') <* end_of_line +(* --- Description parsing with multi-line continuation --- *) +(* Take the rest of the line as text (does not consume newline) *) +let rest_of_line = take_till (fun c -> c = '\n' || c = '\r') + +(* Check if a line is a continuation line: deeply indented, doesn't start with '-' *) +let continuation_line = + peek_string 1 >>= fun _ -> + (* Must start with significant whitespace (8+ spaces or tab) *) + let count_indent s = + let n = ref 0 in + let i = ref 0 in + while !i < String.length s do + (match s.[!i] with + | ' ' -> incr n + | '\t' -> n := !n + 8 + | _ -> i := String.length s); + incr i + done; + !n + in + available >>= fun avail -> + if avail = 0 then fail "eof" + else + (* Peek ahead to see indentation level *) + peek_string (min avail 80) >>= fun preview -> + let indent = count_indent preview in + let trimmed = String.trim preview in + let starts_with_dash = + String.length trimmed > 0 && trimmed.[0] = '-' + in + if indent >= 8 && not starts_with_dash then + (* This is a continuation line — consume whitespace + text *) + inline_ws *> rest_of_line <* eol + else + fail "not a continuation line" + +let description = + inline_ws *> rest_of_line <* eol >>= fun first_line -> + many continuation_line >>| fun cont_lines -> + let all = first_line :: cont_lines in + let all = List.filter (fun s -> String.length (String.trim s) > 0) all in + String.concat " " (List.map String.trim all) + +(* Description that appears on a separate line below the flag (Clap long style) *) +let description_below = + many1 continuation_line >>| fun lines -> + let lines = List.filter (fun s -> String.length (String.trim s) > 0) lines in + String.concat " " (List.map String.trim lines) + +(* --- Line classification for skipping --- *) + +(* An option line starts with whitespace then '-' *) +let at_option_line = + peek_string 1 >>= fun _ -> + available >>= fun avail -> + if avail = 0 then fail "eof" + else + peek_string (min avail 40) >>= fun preview -> + let s = String.trim preview in + if String.length s > 0 && s.[0] = '-' then return () + else fail "not an option line" + +(* Skip a non-option line (section header, blank, description-only, etc.) *) +let skip_non_option_line = + (* Don't skip if this looks like an option line *) + (at_option_line *> fail "this is an option line") + <|> (rest_of_line *> eol_strict *> return ()) + +(* --- Entry parsing --- *) + +(* Parse a single flag entry *) let entry = - skip_while (fun c -> c <> '-') - *> lift3 (fun a b c -> (a, b, c)) switch_parser param_parser description - >>| fun (switch, param, desc) -> { switch; param; desc } + inline_ws *> + lift2 (fun (sw, param) desc -> { switch = sw; param; desc }) + (lift2 (fun a b -> (a, b)) switch_parser param_parser) + (description <|> (eol *> (description_below <|> return ""))) -let endline = option () (char '\n' *> return ()) -let entry_line = entry <* endline -let help_parser = many entry_line +(* --- Subcommand parsing --- *) + +(* A subcommand line: " name description" *) +let subcommand_entry = + inline_ws *> + take_while1 (fun c -> c <> ' ' && c <> '\t' && c <> '\n') >>= fun name -> + (* Must have at least 2 spaces before description *) + char ' ' *> char ' ' *> inline_ws *> + rest_of_line <* eol >>| fun desc -> + { name; desc = String.trim desc } + +(* --- Top-level parser --- *) + +(* The main help parser: walks through lines, skipping non-option content, + collecting entries and subcommands *) +let help_parser = + let open Angstrom in + fix (fun _self -> + (* Try to parse an entry *) + let try_entry = + entry >>| fun e -> `Entry e + in + (* Try to parse a subcommand *) + let try_subcommand = + subcommand_entry >>| fun sc -> `Subcommand sc + in + (* Skip one non-option line *) + let try_skip = + skip_non_option_line >>| fun () -> `Skip + in + many (choice [ try_entry; try_subcommand; try_skip ]) >>| fun items -> + let entries = List.filter_map (function `Entry e -> Some e | _ -> None) items in + let subcommands = List.filter_map (function `Subcommand sc -> Some sc | _ -> None) items in + { entries; subcommands }) let parse_help txt = - Angstrom.parse_string ~consume:Consume.Prefix help_parser txt + let clean = strip_ansi txt in + match Angstrom.parse_string ~consume:Consume.Prefix help_parser clean with + | Ok result -> Ok result + | Error msg -> Error msg + +(* --- Pretty printers --- *) let print_switch = function | Short o -> Printf.sprintf "Short: %c" o @@ -75,3 +273,11 @@ let print_entry e = Printf.printf "\n\t** ENTRY **\n\tSwitch: %s\n\tParam: %s\n\tDescription: %s\n" (print_switch e.switch) (print_opt e.param) e.desc + +let print_subcommand sc = + Printf.printf "\n\t** SUBCOMMAND **\n\tName: %s\n\tDescription: %s\n" + sc.name sc.desc + +let print_help_result r = + List.iter print_entry r.entries; + List.iter print_subcommand r.subcommands diff --git a/nix/module.nix b/nix/module.nix new file mode 100644 index 0000000..f3a920b --- /dev/null +++ b/nix/module.nix @@ -0,0 +1,109 @@ +# NixOS module: automatic nushell completion generation from manpages +# +# Modeled on nixpkgs' programs/fish.nix completion generator. +# For each package in environment.systemPackages, a small derivation runs +# `inshellah manpage-dir` against its share/man directory. Results are merged +# into a single directory and placed in nushell's vendor autoload path. +# +# Usage in your NixOS configuration: +# +# { pkgs, ... }: { +# imports = [ ./path/to/inshellah/nix/module.nix ]; +# programs.inshellah.enable = true; +# # Optionally add packages not in systemPackages: +# # programs.inshellah.extraPackages = [ pkgs.kubectl ]; +# } + +{ config, lib, pkgs, ... }: + +let + cfg = config.programs.inshellah; + + # The inshellah binary — override this if you build from the local flake + inshellahPkg = cfg.package; + + # Per-package derivation: run inshellah manpage-dir against a package's manpages + generateCompletions = package: + pkgs.runCommandLocal + (let + inherit (lib.strings) stringLength substring storeDir; + storeLength = stringLength storeDir + 34; + pathName = substring storeLength (stringLength package - storeLength) package; + in + (package.name or pathName) + "_nu-completions" + ) + ({ + inherit package; + nativeBuildInputs = [ inshellahPkg ]; + } // lib.optionalAttrs (package ? meta.priority) { + meta.priority = package.meta.priority; + }) + '' + mkdir -p $out + if [ -d "$package/share/man" ]; then + inshellah manpage-dir "$package/share/man" > "$out/completions.nu" 2>/dev/null || true + # Remove empty files + find $out -empty -delete + fi + ''; + +in { + options.programs.inshellah = { + enable = lib.mkEnableOption "nushell completion generation from manpages via inshellah"; + + package = lib.mkOption { + type = lib.types.package; + description = "The inshellah package to use for generating completions."; + }; + + extraPackages = lib.mkOption { + type = lib.types.listOf lib.types.package; + default = []; + description = '' + Additional packages to generate nushell completions from, beyond + those in {option}`environment.systemPackages`. + ''; + }; + + generatedCompletionsPath = lib.mkOption { + type = lib.types.str; + default = "/share/nushell/vendor/autoload"; + description = '' + Subdirectory within the merged environment where completion files + are placed. The default matches nushell's vendor autoload convention + (discovered via XDG_DATA_DIRS). + ''; + }; + }; + + config = lib.mkIf cfg.enable { + # Merge all per-package completions into a single directory. + # This path ends up in the system profile, and nushell discovers it + # via XDG_DATA_DIRS -> $prefix/share/nushell/vendor/autoload/ + environment.pathsToLink = [ cfg.generatedCompletionsPath ]; + + environment.systemPackages = [ + (pkgs.buildEnv { + name = "nushell-generated-completions"; + ignoreCollisions = true; + paths = map generateCompletions ( + config.environment.systemPackages ++ cfg.extraPackages + ); + pathsToLink = [ "/" ]; + # Nest everything under the vendor autoload path + postBuild = '' + if [ -d "$out" ]; then + tmp=$(mktemp -d) + cp -r "$out/"* "$tmp/" 2>/dev/null || true + rm -rf "$out/"* + mkdir -p "$out${cfg.generatedCompletionsPath}" + for f in "$tmp"/*.nu; do + [ -f "$f" ] && cp "$f" "$out${cfg.generatedCompletionsPath}/" + done + rm -rf "$tmp" + fi + ''; + }) + ]; + }; +} diff --git a/test/dune b/test/dune index 20b5aab..d54a2fb 100644 --- a/test/dune +++ b/test/dune @@ -1,2 +1,3 @@ (test - (name test_inshellah_parser)) + (name test_inshellah) + (libraries inshellah str)) diff --git a/test/test_inshellah.ml b/test/test_inshellah.ml new file mode 100644 index 0000000..c057a84 --- /dev/null +++ b/test/test_inshellah.ml @@ -0,0 +1,326 @@ +open Inshellah.Parser +open Inshellah.Manpage +open Inshellah.Nushell + +let failures = ref 0 +let passes = ref 0 + +let check name condition = + if condition then begin + incr passes; + Printf.printf " PASS: %s\n" name + end else begin + incr failures; + Printf.printf " FAIL: %s\n" name + end + +let parse txt = + match parse_help txt with + | Ok r -> r + | Error msg -> failwith (Printf.sprintf "parse_help failed: %s" msg) + +(* --- Help parser tests --- *) + +let test_gnu_basic () = + Printf.printf "\n== GNU basic flags ==\n"; + let r = parse " -a, --all do not ignore entries starting with .\n" in + check "one entry" (List.length r.entries = 1); + let e = List.hd r.entries in + check "both switch" (e.switch = Both ('a', "all")); + check "no param" (e.param = None); + check "desc" (String.length e.desc > 0) + +let test_gnu_eq_param () = + Printf.printf "\n== GNU = param ==\n"; + let r = parse " --block-size=SIZE scale sizes by SIZE\n" in + check "one entry" (List.length r.entries = 1); + let e = List.hd r.entries in + check "long switch" (e.switch = Long "block-size"); + check "mandatory param" (e.param = Some (Mandatory "SIZE")) + +let test_gnu_opt_param () = + Printf.printf "\n== GNU optional param ==\n"; + let r = parse " --color[=WHEN] color the output WHEN\n" in + check "one entry" (List.length r.entries = 1); + let e = List.hd r.entries in + check "long switch" (e.switch = Long "color"); + check "optional param" (e.param = Some (Optional "WHEN")) + +let test_underscore_param () = + Printf.printf "\n== Underscore in param (TIME_STYLE) ==\n"; + let r = parse " --time-style=TIME_STYLE time/date format\n" in + check "one entry" (List.length r.entries = 1); + let e = List.hd r.entries in + check "param with underscore" (e.param = Some (Mandatory "TIME_STYLE")) + +let test_short_only () = + Printf.printf "\n== Short-only flag ==\n"; + let r = parse " -v verbose output\n" in + check "one entry" (List.length r.entries = 1); + check "short switch" ((List.hd r.entries).switch = Short 'v') + +let test_long_only () = + Printf.printf "\n== Long-only flag ==\n"; + let r = parse " --help display help\n" in + check "one entry" (List.length r.entries = 1); + check "long switch" ((List.hd r.entries).switch = Long "help") + +let test_multiline_desc () = + Printf.printf "\n== Multi-line description ==\n"; + let r = parse {| --block-size=SIZE with -l, scale sizes by SIZE when printing them; + e.g., '--block-size=M'; see SIZE format below +|} in + check "one entry" (List.length r.entries = 1); + let e = List.hd r.entries in + check "desc includes continuation" (String.length e.desc > 50) + +let test_multiple_entries () = + Printf.printf "\n== Multiple entries ==\n"; + let r = parse {| -a, --all do not ignore entries starting with . + -A, --almost-all do not list implied . and .. + --author with -l, print the author of each file +|} in + check "three entries" (List.length r.entries = 3) + +let test_clap_short_sections () = + Printf.printf "\n== Clap short with section headers ==\n"; + let r = parse {|INPUT OPTIONS: + -e, --regexp=PATTERN A pattern to search for. + -f, --file=PATTERNFILE Search for patterns from the given file. +SEARCH OPTIONS: + -s, --case-sensitive Search case sensitively. +|} in + check "three entries" (List.length r.entries = 3); + let e = List.hd r.entries in + check "first is regexp" (e.switch = Both ('e', "regexp")); + check "first has param" (e.param = Some (Mandatory "PATTERN")) + +let test_clap_long_style () = + Printf.printf "\n== Clap long style (desc below flag) ==\n"; + let r = parse {| -H, --hidden + Include hidden directories and files. + + --no-ignore + Do not respect ignore files. +|} in + check "two entries" (List.length r.entries = 2); + let e = List.hd r.entries in + check "hidden switch" (e.switch = Both ('H', "hidden")); + check "desc below" (String.length e.desc > 0) + +let test_clap_long_angle_param () = + Printf.printf "\n== Clap long angle bracket param ==\n"; + let r = parse {| --nonprintable-notation + Set notation for non-printable characters. +|} in + check "one entry" (List.length r.entries = 1); + let e = List.hd r.entries in + check "long switch" (e.switch = Long "nonprintable-notation"); + check "angle param" (e.param = Some (Mandatory "notation")) + +let test_space_upper_param () = + Printf.printf "\n== Space-separated ALL_CAPS param ==\n"; + let r = parse " -f, --foo FOO foo help\n" in + check "one entry" (List.length r.entries = 1); + let e = List.hd r.entries in + check "switch" (e.switch = Both ('f', "foo")); + check "space param" (e.param = Some (Mandatory "FOO")) + +let test_go_cobra_flags () = + Printf.printf "\n== Go/Cobra flags ==\n"; + let r = parse {|Flags: + -D, --debug Enable debug mode + -H, --host string Daemon socket to connect to + -v, --version Print version information +|} in + check "three flag entries" (List.length r.entries = 3); + (* Check the host flag has a type param *) + let host = List.nth r.entries 1 in + check "host switch" (host.switch = Both ('H', "host")); + check "host type param" (host.param = Some (Mandatory "string")) + +let test_go_cobra_subcommands () = + Printf.printf "\n== Go/Cobra subcommands ==\n"; + let r = parse {|Common Commands: + run Create and run a new container from an image + exec Execute a command in a running container + build Build an image from a Dockerfile +|} in + check "has subcommands" (List.length r.subcommands > 0) + +let test_busybox_tab () = + Printf.printf "\n== Busybox tab-indented ==\n"; + let r = parse "\t-1\tOne column output\n\t-a\tInclude names starting with .\n" in + check "two entries" (List.length r.entries = 2); + check "first is -1" ((List.hd r.entries).switch = Short '1') + +let test_no_debug_prints () = + Printf.printf "\n== No debug side effects ==\n"; + (* The old parser had print_endline at module load time. + If we got here without "opt param is running" on stdout, we're good. *) + check "no debug prints" true + +(* --- Manpage parser tests --- *) + +let test_manpage_tp_style () = + Printf.printf "\n== Manpage .TP style ==\n"; + let groff = {|.SH OPTIONS +.TP +\fB\-a\fR, \fB\-\-all\fR +do not ignore entries starting with . +.TP +\fB\-A\fR, \fB\-\-almost\-all\fR +do not list implied . and .. +.TP +\fB\-\-block\-size\fR=\fISIZE\fR +with \fB\-l\fR, scale sizes by SIZE +.SH AUTHOR +Written by someone. +|} in + let entries = parse_manpage_string groff in + check "three entries" (List.length entries = 3); + if List.length entries >= 1 then begin + let e = List.hd entries in + check "first is -a/--all" (e.switch = Both ('a', "all")); + check "first desc" (String.length e.desc > 0) + end; + if List.length entries >= 3 then begin + let e = List.nth entries 2 in + check "block-size switch" (e.switch = Long "block-size"); + check "block-size param" (e.param = Some (Mandatory "SIZE")) + end + +let test_manpage_ip_style () = + Printf.printf "\n== Manpage .IP style ==\n"; + let groff = {|.SH OPTIONS +.IP "\fB\-k\fR, \fB\-\-insecure\fR" +Allow insecure connections. +.IP "\fB\-o\fR, \fB\-\-output\fR \fIfile\fR" +Write output to file. +.SH SEE ALSO +|} in + let entries = parse_manpage_string groff in + check "two entries" (List.length entries = 2); + if List.length entries >= 1 then begin + let e = List.hd entries in + check "first is -k/--insecure" (e.switch = Both ('k', "insecure")) + end + +let test_manpage_groff_stripping () = + Printf.printf "\n== Groff escape stripping ==\n"; + let s = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in + check "font escapes removed" (not (String.contains s 'f' && String.contains s 'B')); + check "dashes converted" (String.contains s '-'); + let s2 = strip_groff_escapes {|\(aqhello\(aq|} in + check "aq -> quote" (String.contains s2 '\'') + +let test_manpage_empty_options () = + Printf.printf "\n== Manpage with no OPTIONS section ==\n"; + let groff = {|.SH NAME +foo \- does stuff +.SH DESCRIPTION +Does stuff. +|} in + let entries = parse_manpage_string groff in + check "no entries" (List.length entries = 0) + +(* --- Nushell generation tests --- *) + +let contains s sub = + try + let _ = Str.search_forward (Str.regexp_string sub) s 0 in true + with Not_found -> false + +let test_nushell_basic () = + Printf.printf "\n== Nushell basic extern ==\n"; + let r = parse " -a, --all do not ignore entries starting with .\n" in + let nu = generate_extern "ls" r in + check "has extern" (contains nu "export extern \"ls\""); + check "has --all(-a)" (contains nu "--all(-a)"); + check "has comment" (contains nu "# do not ignore") + +let test_nushell_param_types () = + Printf.printf "\n== Nushell param type mapping ==\n"; + let r = parse {| -w, --width=COLS set output width + --block-size=SIZE scale sizes + -o, --output FILE output file +|} in + let nu = generate_extern "ls" r in + check "COLS -> int" (contains nu "--width(-w): int"); + check "SIZE -> string" (contains nu "--block-size: string"); + check "FILE -> path" (contains nu "--output(-o): path") + +let test_nushell_subcommands () = + Printf.printf "\n== Nushell subcommands ==\n"; + let r = parse {|Common Commands: + run Create and run a new container + exec Execute a command + +Flags: + -D, --debug Enable debug mode +|} in + let nu = generate_extern "docker" r in + check "has main extern" (contains nu "export extern \"docker\""); + check "has --debug" (contains nu "--debug(-D)"); + check "has run subcommand" (contains nu "export extern \"docker run\""); + check "has exec subcommand" (contains nu "export extern \"docker exec\"") + +let test_nushell_from_manpage () = + Printf.printf "\n== Nushell from manpage ==\n"; + let groff = {|.SH OPTIONS +.TP +\fB\-a\fR, \fB\-\-all\fR +do not ignore entries starting with . +.TP +\fB\-\-block\-size\fR=\fISIZE\fR +scale sizes by SIZE +.SH AUTHOR +|} in + let entries = parse_manpage_string groff in + let nu = generate_extern_from_entries "ls" entries in + check "has extern" (contains nu "export extern \"ls\""); + check "has --all(-a)" (contains nu "--all(-a)"); + check "has --block-size" (contains nu "--block-size: string") + +let test_nushell_module () = + Printf.printf "\n== Nushell module wrapper ==\n"; + let r = parse " -v, --verbose verbose output\n" in + let nu = generate_module "myapp" r in + check "has module" (contains nu "module myapp-completions"); + check "has extern inside" (contains nu "export extern \"myapp\""); + check "has flag" (contains nu "--verbose(-v)") + +let () = + Printf.printf "Running help parser tests...\n"; + test_gnu_basic (); + test_gnu_eq_param (); + test_gnu_opt_param (); + test_underscore_param (); + test_short_only (); + test_long_only (); + test_multiline_desc (); + test_multiple_entries (); + test_clap_short_sections (); + test_clap_long_style (); + test_clap_long_angle_param (); + test_space_upper_param (); + test_go_cobra_flags (); + test_go_cobra_subcommands (); + test_busybox_tab (); + test_no_debug_prints (); + + Printf.printf "\nRunning manpage parser tests...\n"; + test_manpage_tp_style (); + test_manpage_ip_style (); + test_manpage_groff_stripping (); + test_manpage_empty_options (); + + Printf.printf "\nRunning nushell generation tests...\n"; + test_nushell_basic (); + test_nushell_param_types (); + test_nushell_subcommands (); + test_nushell_from_manpage (); + test_nushell_module (); + + Printf.printf "\n=== Results: %d passed, %d failed ===\n" !passes !failures; + if !failures > 0 then exit 1 diff --git a/test/test_inshellah_parser.ml b/test/test_inshellah_parser.ml deleted file mode 100644 index e69de29..0000000