From 7f0ec8ab4d36ce9e421183da43c2b868af65f01f Mon Sep 17 00:00:00 2001 From: atagen Date: Sat, 21 Mar 2026 02:07:46 +1100 Subject: [PATCH] comprehensive completion generation: native, manpage, --help Three-strategy pipeline with priority: native completion generators (e.g. CMD completions nushell) > manpage parsing > --help fallback. Single `generate` command produces one module-wrapped .nu file per command. Parallel execution scaled to cores, 200ms timeouts, ELF string scanning to skip binaries without -h support, native gzip decompression via camlzip, SYNOPSIS-based subcommand detection, nix3 manpage strategy, deduplication, nushell builtin exclusion. --- bin/main.ml | 411 ++++++++++++++++++++++++++++++++++------- dune-project | 18 +- flake.nix | 36 ++++ lib/dune | 2 +- lib/manpage.ml | 207 +++++++++++++++++---- lib/nushell.ml | 198 +++++++++++++------- lib/parser.ml | 14 +- nix/module.nix | 122 +++++------- test/test_inshellah.ml | 194 +++++++++++++++++-- 9 files changed, 937 insertions(+), 265 deletions(-) diff --git a/bin/main.ml b/bin/main.ml index 4f07951..c1772db 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -3,107 +3,392 @@ open Inshellah.Manpage open Inshellah.Nushell let usage () = - Printf.eprintf {|inshellah — generate nushell completions from manpages and --help output + Printf.eprintf + {|inshellah - generate nushell completions Usage: - inshellah manpage FILE Parse a manpage (.1, .1.gz) and emit nushell extern - inshellah manpage-dir DIR Batch-process all manpages under DIR/man1/ - inshellah help CMD [ARGS...] Run CMD ARGS --help, parse output, emit nushell extern - inshellah parse-help CMD Read --help text from stdin, emit nushell extern for CMD - inshellah demo Run built-in demo + inshellah generate BINDIR MANDIR -o OUTDIR + Full generation: native completions, manpages, and --help fallback. + One .nu file per command. + inshellah manpage FILE Parse a manpage and emit nushell extern + inshellah manpage-dir DIR Batch-process manpages under DIR + inshellah help CMD [ARGS...] Run CMD ARGS --help, parse and emit extern + inshellah parse-help CMD Read --help text from stdin, emit extern + inshellah demo Run built-in demo |}; exit 1 -(* Extract command name from a manpage filename *) +let command_sections = [1; 8] + +let contains_str s sub = + try ignore (Str.search_forward (Str.regexp_string sub) s 0); true + with Not_found -> false + +let is_nushell_source text = + String.length text > 20 + && (contains_str text "export extern" + || contains_str text "export def" + || (contains_str text "module " && contains_str text "export")) + +let filename_of_cmd cmd = + String.map (function + | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.') as c -> c | _ -> '-') cmd + +let write_file path contents = + let oc = open_out path in output_string oc contents; close_out oc + let cmd_name_of_manpage path = let base = Filename.basename path in - (* strip .gz if present *) let base = if Filename.check_suffix base ".gz" then Filename.chop_suffix base ".gz" - else base + else base in + try Filename.chop_extension base with Invalid_argument _ -> base + +let safe_env = lazy ( + Array.of_list ( + List.filter (fun s -> + not (String.starts_with ~prefix:"DISPLAY=" s + || String.starts_with ~prefix:"WAYLAND_DISPLAY=" s + || String.starts_with ~prefix:"DBUS_SESSION_BUS_ADDRESS=" s + || String.starts_with ~prefix:"XAUTHORITY=" s)) + (Array.to_list (Unix.environment ())))) + +let run_cmd args timeout_ms = + let (rd, wr) = Unix.pipe () in + let devnull = Unix.openfile "/dev/null" [Unix.O_RDONLY] 0 in + let argv = Array.of_list args in + let pid = + try Unix.create_process_env (List.hd args) argv + (Lazy.force safe_env) devnull wr wr + with Unix.Unix_error _ -> + Unix.close rd; Unix.close wr; Unix.close devnull; -1 in + Unix.close wr; Unix.close devnull; + if pid < 0 then (Unix.close rd; None) + else begin + let buf = Buffer.create 4096 in + let deadline = Unix.gettimeofday () +. (float_of_int timeout_ms /. 1000.0) in + let chunk = Bytes.create 8192 in + let alive = ref true in + (try while !alive do + let remaining = deadline -. Unix.gettimeofday () in + if remaining <= 0.0 then alive := false + else match Unix.select [rd] [] [] (min remaining 0.05) with + | (_ :: _, _, _) -> + let n = Unix.read rd chunk 0 8192 in + if n = 0 then raise Exit + else Buffer.add_subbytes buf chunk 0 n + | _ -> () + done with Exit -> ()); + Unix.close rd; + if not !alive then begin + (try Unix.kill pid Sys.sigkill with Unix.Unix_error _ -> ()); + ignore (Unix.waitpid [] pid) + end else + ignore (Unix.waitpid [] pid); + if Buffer.length buf > 0 then Some (Buffer.contents buf) else None + end + +let is_executable path = + try let st = Unix.stat path in + st.st_kind = Unix.S_REG && st.st_perm land 0o111 <> 0 + with Unix.Unix_error _ -> false + +let is_shell_script path = + try + let real = Unix.realpath path in + let ic = open_in_bin real in + let line = (try let b = Bytes.create 256 in + let n = input ic b 0 256 in + let s = Bytes.sub_string b 0 n in + (match String.index_opt s '\n' with Some i -> String.sub s 0 i | None -> s) + with End_of_file -> "") in + close_in ic; + String.length line >= 2 && line.[0] = '#' && line.[1] = '!' + && let shebang = String.lowercase_ascii line in + List.exists (fun s -> contains_str shebang s) + ["bash"; "/sh "; "/sh\n"; "zsh"; "fish"; "nushell"; "/nu "; "/nu\n"; + "dash"; "ksh"; "csh"] + with _ -> false + +let elf_scan path needles = + let found = Hashtbl.create 4 in + let remaining () = List.filter (fun n -> not (Hashtbl.mem found n)) needles in + (try + let real = Unix.realpath path in + let ic = open_in_bin real in + let magic = Bytes.create 4 in + really_input ic magic 0 4; + if Bytes.get magic 0 = '\x7f' && Bytes.get magic 1 = 'E' + && Bytes.get magic 2 = 'L' && Bytes.get magic 3 = 'F' then begin + let max_needle = List.fold_left (fun m n -> max m (String.length n)) 0 needles in + let chunk_size = 65536 in + let buf = Bytes.create (chunk_size + max_needle) in + let carry = ref 0 in + let eof = ref false in + while not !eof && remaining () <> [] do + let n = (try input ic buf !carry chunk_size with End_of_file -> 0) in + if n = 0 then eof := true + else begin + let total = !carry + n in + List.iter (fun needle -> + if not (Hashtbl.mem found needle) then begin + let nlen = String.length needle in + let i = ref 0 in + while !i <= total - nlen do + if Bytes.get buf !i = needle.[0] then begin + let ok = ref true in + for j = 1 to nlen - 1 do + if Bytes.get buf (!i + j) <> needle.[j] then ok := false + done; + if !ok then (Hashtbl.replace found needle true; i := total) + else incr i + end else incr i + done + end + ) (remaining ()); + let new_carry = min max_needle total in + Bytes.blit buf (total - new_carry) buf 0 new_carry; + carry := new_carry + end + done + end; + close_in ic + with _ -> + List.iter (fun n -> Hashtbl.replace found n true) needles); + found + +let skip_name name = + String.length name = 0 || name.[0] = '.' + || String.starts_with ~prefix:"lib" name + || String.ends_with ~suffix:"-daemon" name + || String.ends_with ~suffix:"-wrapped" name + || String.ends_with ~suffix:".so" name + +type bin_class = Skip | Try_help | Try_native_and_help + +let classify_binary bindir name = + if is_nushell_builtin name || skip_name name then Skip + else + let path = Filename.concat bindir name in + if not (is_executable path) then Skip + else if is_shell_script path then Try_help + else + let scan = elf_scan path ["-h"; "completion"] in + if not (Hashtbl.mem scan "-h") then Skip + else if Hashtbl.mem scan "completion" then Try_native_and_help + else Try_help + +let num_cores () = + try + let ic = open_in "/proc/cpuinfo" in + let n = ref 0 in + (try while true do + if String.starts_with ~prefix:"processor" (input_line ic) then incr n + done with End_of_file -> ()); + close_in ic; max 1 !n + with _ -> 4 + +let try_native_completion bin_path = + let patterns = [ + [bin_path; "completions"; "nushell"]; + [bin_path; "completion"; "nushell"]; + [bin_path; "--completions"; "nushell"]; + [bin_path; "--completion"; "nushell"]; + [bin_path; "generate-completion"; "nushell"]; + [bin_path; "--generate-completion"; "nushell"]; + [bin_path; "shell-completions"; "nushell"]; + ] in + let rec go = function + | [] -> None + | args :: rest -> + match run_cmd args 500 with + | Some text when is_nushell_source text -> Some text + | _ -> go rest in - (* strip .N section suffix *) - try Filename.chop_extension base - with Invalid_argument _ -> base + go patterns let cmd_manpage file = - let cmd = cmd_name_of_manpage file in - let entries = parse_manpage_file file in - if entries <> [] then - print_string (generate_extern_from_entries cmd entries) + let contents = read_manpage_file file in + let fallback = cmd_name_of_manpage file in + let cmd = match extract_synopsis_command contents with + | Some name -> name | None -> fallback in + if not (is_nushell_builtin cmd) then + let result = parse_manpage_string contents in + if result.entries <> [] then + print_string (generate_extern cmd result) let cmd_manpage_dir dir = - (* Walk man1/ through man9/ looking for manpages *) - for section = 1 to 9 do + List.iter (fun section -> let subdir = Filename.concat dir (Printf.sprintf "man%d" section) in - if Sys.file_exists subdir && Sys.is_directory subdir then begin - let files = Sys.readdir subdir in - Array.sort String.compare files; + if Sys.file_exists subdir && Sys.is_directory subdir then Array.iter (fun file -> - let path = Filename.concat subdir file in - try cmd_manpage path - with _ -> () (* skip unparseable manpages *) - ) files - end - done + (try cmd_manpage (Filename.concat subdir file) with _ -> ()) + ) (Sys.readdir subdir) + ) command_sections let cmd_help args = match args with - | [] -> Printf.eprintf "error: help mode requires a command name\n"; exit 1 + | [] -> Printf.eprintf "error: help requires a command name\n"; exit 1 | cmd :: rest -> - let full_cmd = - String.concat " " (List.map Filename.quote (cmd :: rest @ ["--help"])) - in - let ic = Unix.open_process_in (full_cmd ^ " 2>&1") in - let buf = Buffer.create 4096 in - (try while true do - let line = input_line ic in - Buffer.add_string buf line; - Buffer.add_char buf '\n' - done with End_of_file -> ()); - let _ = Unix.close_process_in ic in - let text = Buffer.contents buf in - let cmd_name = Filename.basename cmd in - (match parse_help text with - | Ok r -> print_string (generate_extern cmd_name r) - | Error msg -> Printf.eprintf "parse error for %s: %s\n" cmd_name msg; exit 1) + let name = Filename.basename cmd in + (match run_cmd (cmd :: rest @ ["--help"]) 10_000 with + | None -> Printf.eprintf "no output from %s --help\n" name; exit 1 + | Some text -> + (match parse_help text with + | Ok r -> print_string (generate_extern name r) + | Error msg -> Printf.eprintf "parse error for %s: %s\n" name msg; exit 1)) let cmd_parse_help cmd = let buf = Buffer.create 4096 in (try while true do - let line = input_line stdin in - Buffer.add_string buf line; - Buffer.add_char buf '\n' + Buffer.add_string buf (input_line stdin); Buffer.add_char buf '\n' done with End_of_file -> ()); - let text = Buffer.contents buf in - (match parse_help text with + (match parse_help (Buffer.contents buf) with | Ok r -> print_string (generate_extern cmd r) | Error msg -> Printf.eprintf "parse error for %s: %s\n" cmd msg; exit 1) -let cmd_demo () = - let ls_help = - {|Usage: ls [OPTION]... [FILE]... -List information about the FILEs (the current directory by default). +let process_manpage file = + try + let contents = read_manpage_file file in + let fallback = cmd_name_of_manpage file in + let cmd = match extract_synopsis_command contents with + | Some name -> name | None -> fallback in + if is_nushell_builtin cmd then None + else + let result = parse_manpage_string contents in + if result.entries <> [] then Some (cmd, result) else None + with _ -> None +let manpaged_commands mandir = + let cmds = Hashtbl.create 128 in + List.iter (fun section -> + let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in + if Sys.file_exists subdir && Sys.is_directory subdir then + Array.iter (fun f -> Hashtbl.replace cmds (cmd_name_of_manpage f) true) + (Sys.readdir subdir) + ) command_sections; + cmds + +let cmd_generate bindir mandir outdir = + let done_cmds = Hashtbl.create 256 in + let bins = Sys.readdir bindir in + Array.sort String.compare bins; + let manpaged = manpaged_commands mandir in + let max_jobs = num_cores () in + let classified = Array.map (fun name -> + if Hashtbl.mem manpaged name then (name, Skip) + else (name, classify_binary bindir name) + ) bins in + let pending = ref [] in + let reap () = + pending := List.filter (fun pid -> + match Unix.waitpid [Unix.WNOHANG] pid with + | (0, _) -> true | _ -> false + | exception Unix.Unix_error (Unix.ECHILD, _, _) -> false + ) !pending in + let wait_slot () = + while List.length !pending >= max_jobs do + reap (); + if List.length !pending >= max_jobs then + (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); + reap () + done in + Array.iter (fun (name, cls) -> + match cls with + | Skip -> () + | Try_help | Try_native_and_help -> + wait_slot (); + let pid = Unix.fork () in + if pid = 0 then begin + (try + let path = Filename.concat bindir name in + let native_ok = match cls with + | Try_native_and_help -> + (match try_native_completion path with + | Some src -> + write_file (Filename.concat outdir (filename_of_cmd name ^ ".nu")) src; + true + | None -> false) + | _ -> false in + if not native_ok then begin + let text = match run_cmd [path; "--help"] 200 with + | Some _ as r -> r + | None -> run_cmd [path; "-h"] 200 in + match text with + | None -> () + | Some t -> + (match parse_help t with + | Ok r when r.entries <> [] -> + write_file (Filename.concat outdir (filename_of_cmd name ^ ".nu")) + (generate_module name r) + | _ -> ()) + end; + exit 0 + with _ -> exit 1) + end else begin + pending := pid :: !pending; + Hashtbl.replace done_cmds name true + end + ) classified; + while !pending <> [] do + (try ignore (Unix.wait ()) with Unix.Unix_error _ -> ()); + reap () + done; + List.iter (fun section -> + let subdir = Filename.concat mandir (Printf.sprintf "man%d" section) in + if Sys.file_exists subdir && Sys.is_directory subdir then begin + let files = Sys.readdir subdir in + Array.sort String.compare files; + Array.iter (fun file -> + match process_manpage (Filename.concat subdir file) with + | None -> () + | Some (cmd, result) -> + let base = List.hd (String.split_on_char ' ' cmd) in + if Hashtbl.mem done_cmds base then () + else begin + let outpath = Filename.concat outdir (filename_of_cmd base ^ ".nu") in + if Sys.file_exists outpath then begin + let existing = + let ic = open_in outpath in + let n = in_channel_length ic in + let s = Bytes.create n in + really_input ic s 0 n; close_in ic; + Bytes.to_string s in + let mod_name = module_name_of base in + let use_line = Printf.sprintf "\nuse %s *\n" mod_name in + let base_content = + if contains_str existing use_line then + String.sub existing 0 + (Str.search_forward (Str.regexp_string use_line) existing 0) + else existing in + write_file outpath + (String.concat "" [base_content; generate_extern cmd result; use_line]) + end else + write_file outpath (generate_module base result) + end + ) files + end + ) command_sections + +let cmd_demo () = + Printf.printf "# Generated by: inshellah demo\n\n"; + match parse_help + {|Usage: ls [OPTION]... [FILE]... -a, --all do not ignore entries starting with . -A, --almost-all do not list implied . and .. - --block-size=SIZE with -l, scale sizes by SIZE when printing them; - e.g., '--block-size=M'; see SIZE format below + --block-size=SIZE with -l, scale sizes by SIZE when printing --color[=WHEN] color the output WHEN -h, --human-readable with -l and -s, print sizes like 1K 234M 2G etc. --help display this help and exit --version output version information and exit -|} - in - Printf.printf "# Generated by: inshellah demo\n\n"; - (match parse_help ls_help with - | Ok r -> print_string (generate_extern "ls" r) - | Error msg -> Printf.eprintf "parse error: %s\n" msg) +|} with + | Ok r -> print_string (generate_extern "ls" r) + | Error msg -> Printf.eprintf "parse error: %s\n" msg let () = - let args = Array.to_list Sys.argv |> List.tl in - match args with + match Array.to_list Sys.argv |> List.tl with + | ["generate"; bindir; mandir; "-o"; outdir] -> cmd_generate bindir mandir outdir | ["manpage"; file] -> cmd_manpage file | ["manpage-dir"; dir] -> cmd_manpage_dir dir | "help" :: rest -> cmd_help rest diff --git a/dune-project b/dune-project index 21eaff0..514770f 100644 --- a/dune-project +++ b/dune-project @@ -7,25 +7,23 @@ (source (github username/reponame)) -(authors "Author Name ") +(authors "atagen ") -(maintainers "Maintainer Name ") +(maintainers "atagen ") -(license LICENSE) - -(documentation https://url/to/documentation) +(license GPL-3.0-or-later) (package (name inshellah) - (synopsis "A short synopsis") - (description "A longer description") + (synopsis "Nushell completions generator") + (description + "Inshellah parses manpages and --help switches to generate completions for nushell.") (depends ocaml dune angstrom angstrom-unix + camlzip (ppx_inline_test :with-test)) (tags - ("add topics" "to describe" your project))) - -; See the complete stanza docs at https://dune.readthedocs.io/en/stable/reference/dune-project/index.html + (shell completions nushell parser angstrom))) diff --git a/flake.nix b/flake.nix index e9b3374..90a94f9 100644 --- a/flake.nix +++ b/flake.nix @@ -19,6 +19,7 @@ ocaml angstrom angstrom-unix + camlzip ppx_inline_test ocaml-lsp ocamlformat @@ -41,6 +42,7 @@ ocaml angstrom angstrom-unix + camlzip ]; meta.mainProgram = "inshellah"; @@ -49,6 +51,40 @@ } ); + checks = forAllSystems ( + pkgs: sys: + let + # Evaluate a minimal NixOS config that enables the module. + # If the module has infinite recursion, this evaluation will fail. + mockSystem = nixpkgs.lib.nixosSystem { + system = sys; + modules = [ + self.nixosModules.default + { + # Minimal config to make NixOS evaluation happy + boot.loader.grub.device = "nodev"; + fileSystems."/" = { device = "/dev/sda1"; fsType = "ext4"; }; + programs.inshellah.enable = true; + environment.systemPackages = [ pkgs.hello ]; + } + ]; + }; + in + { + module-no-infinite-recursion = pkgs.runCommandLocal "inshellah-module-test" { + # Force evaluation of extraSetup and systemPackages at eval time. + # If the module has infinite recursion, this derivation can't even + # be instantiated. + extraSetupLen = builtins.stringLength mockSystem.config.environment.extraSetup; + syspkgCount = builtins.length mockSystem.config.environment.systemPackages; + } '' + echo "environment.extraSetup length: $extraSetupLen" + echo "environment.systemPackages count: $syspkgCount" + touch $out + ''; + } + ); + nixosModules.default = { pkgs, ... }: { diff --git a/lib/dune b/lib/dune index 9c8ae2e..38defe1 100644 --- a/lib/dune +++ b/lib/dune @@ -1,3 +1,3 @@ (library (name inshellah) - (libraries angstrom angstrom-unix str unix)) + (libraries angstrom angstrom-unix camlzip str unix)) diff --git a/lib/manpage.ml b/lib/manpage.ml index bd5eb0d..036a09a 100644 --- a/lib/manpage.ml +++ b/lib/manpage.ml @@ -6,6 +6,11 @@ let strip_groff_escapes s = let buf = Buffer.create (String.length s) in let len = String.length s in let i = ref 0 in + let last = ref '\000' in + let put c = Buffer.add_char buf c; last := c in + let is_alnum c = + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') + in while !i < len do if s.[!i] = '\\' && !i + 1 < len then begin let next = s.[!i + 1] in @@ -13,10 +18,13 @@ let strip_groff_escapes s = | 'f' -> (* Font escape: \fB, \fI, \fP, \fR, \f(XX, \f[...] *) if !i + 2 < len then begin - if s.[!i + 2] = '(' then - i := !i + 4 (* \f(XX *) - else if s.[!i + 2] = '[' then begin - (* \f[...] - skip to ] *) + let fc = s.[!i + 2] in + (* Insert space before italic font to preserve word boundaries + e.g. \fB--max-results\fR\fIcount\fR → "--max-results count" *) + if fc = 'I' && is_alnum !last then put ' '; + if fc = '(' then + i := !i + 5 (* \f(XX *) + else if fc = '[' then begin i := !i + 3; while !i < len && s.[!i] <> ']' do incr i done; if !i < len then incr i @@ -25,7 +33,7 @@ let strip_groff_escapes s = end else i := !i + 2 | '-' -> - Buffer.add_char buf '-'; + put '-'; i := !i + 2 | '&' | '/' | ',' -> (* Zero-width characters *) @@ -35,10 +43,10 @@ let strip_groff_escapes s = if !i + 3 < len then begin let name = String.sub s (!i + 2) 2 in (match name with - | "aq" -> Buffer.add_char buf '\'' - | "lq" | "Lq" -> Buffer.add_char buf '"' - | "rq" | "Rq" -> Buffer.add_char buf '"' - | "em" | "en" -> Buffer.add_char buf '-' + | "aq" -> put '\'' + | "lq" | "Lq" -> put '"' + | "rq" | "Rq" -> put '"' + | "em" | "en" -> put '-' | _ -> ()); i := !i + 4 end else @@ -51,9 +59,9 @@ let strip_groff_escapes s = if !i < len then begin let name = String.sub s start (!i - start) in (match name with - | "aq" -> Buffer.add_char buf '\'' - | "lq" | "Lq" -> Buffer.add_char buf '"' - | "rq" | "Rq" -> Buffer.add_char buf '"' + | "aq" -> put '\'' + | "lq" | "Lq" -> put '"' + | "rq" | "Rq" -> put '"' | _ -> ()); incr i end @@ -106,19 +114,19 @@ let strip_groff_escapes s = incr i end | 'e' -> - Buffer.add_char buf '\\'; + put '\\'; i := !i + 2 | '\\' -> - Buffer.add_char buf '\\'; + put '\\'; i := !i + 2 | ' ' -> - Buffer.add_char buf ' '; + put ' '; i := !i + 2 | _ -> (* Unknown escape, skip *) i := !i + 2 end else begin - Buffer.add_char buf s.[!i]; + put s.[!i]; incr i end done; @@ -262,18 +270,29 @@ let parse_tag_to_entry tag desc = | Ok (switch, param) -> Some { switch; param; desc } | Error _ -> None +(* Extract tag text from a macro line (.B, .I preserve spaces; .BI/.BR/.IR alternate) *) +let tag_of_macro name args = + match name with + | "B" | "I" -> strip_groff_escapes args |> String.trim + | _ -> strip_inline_macro_args args |> strip_groff_escapes |> String.trim + (* Strategy A: .TP style (most common — GNU coreutils, help2man) *) let strategy_tp lines = let rec walk lines acc = match lines with | [] -> List.rev acc | Macro ("TP", _) :: rest -> - (* Next text line is the tag *) + (* Next line is the tag — could be Text or a formatting macro *) begin match rest with | Text tag :: rest2 -> let (desc, rest3) = collect_text_lines rest2 [] in let entry = parse_tag_to_entry tag desc in walk rest3 (match entry with Some e -> e :: acc | None -> acc) + | Macro (("B" | "I" | "BI" | "BR" | "IR") as m, args) :: rest2 -> + let tag = tag_of_macro m args in + let (desc, rest3) = collect_text_lines rest2 [] in + let entry = parse_tag_to_entry tag desc in + walk rest3 (match entry with Some e -> e :: acc | None -> acc) | _ -> walk rest acc end | _ :: rest -> walk rest acc @@ -352,6 +371,62 @@ let strategy_deroff_lines lines = | Ok result -> result.entries | Error _ -> [] +(* Strategy E: Nix3-style bullet .IP with .UR/.UE hyperlinks *) +let strategy_nix lines = + let is_bullet_ip args = + String.length (String.trim args) > 0 + in + let rec walk lines acc = + match lines with + | [] -> List.rev acc + | Macro ("IP", args) :: rest when is_bullet_ip args -> + (* Collect tag: skip UR/UE macros, collect Text lines *) + let rec collect_tag lines parts = + match lines with + | Macro ("UR", _) :: rest2 -> collect_tag rest2 parts + | Macro ("UE", _) :: rest2 -> collect_tag rest2 parts + | Text s :: rest2 -> collect_tag rest2 (s :: parts) + | _ -> (String.concat " " (List.rev parts), lines) + in + let (tag, rest2) = collect_tag rest [] in + (* Collect description after the description .IP marker *) + let rec collect_desc lines parts = + match lines with + | Macro ("IP", dargs) :: rest3 when not (is_bullet_ip dargs) -> + collect_desc_text rest3 parts + | _ -> (String.concat " " (List.rev parts), lines) + and collect_desc_text lines parts = + match lines with + | Text s :: rest3 -> collect_desc_text rest3 (s :: parts) + | Macro ("IP", args) :: _ when is_bullet_ip args -> + (String.concat " " (List.rev parts), lines) + | Macro (("SS" | "SH"), _) :: _ -> + (String.concat " " (List.rev parts), lines) + | Macro ("RS", _) :: rest3 -> + skip_rs rest3 parts 1 + | Macro ("IP", _) :: rest3 -> + (* Non-bullet IP = continuation paragraph *) + collect_desc_text rest3 parts + | Macro _ :: rest3 -> collect_desc_text rest3 parts + | Blank :: rest3 -> collect_desc_text rest3 parts + | Comment :: rest3 -> collect_desc_text rest3 parts + | [] -> (String.concat " " (List.rev parts), []) + and skip_rs lines parts depth = + match lines with + | Macro ("RE", _) :: rest3 -> + if depth <= 1 then collect_desc_text rest3 parts + else skip_rs rest3 parts (depth - 1) + | Macro ("RS", _) :: rest3 -> skip_rs rest3 parts (depth + 1) + | _ :: rest3 -> skip_rs rest3 parts depth + | [] -> (String.concat " " (List.rev parts), []) + in + let (desc, rest3) = collect_desc rest2 [] in + let entry = parse_tag_to_entry tag desc in + walk rest3 (match entry with Some e -> e :: acc | None -> acc) + | _ :: rest -> walk rest acc + in + walk lines [] + (* Count macros of a given type *) let count_macro name lines = List.fold_left (fun n line -> @@ -370,46 +445,106 @@ let extract_entries lines = (* Try PP+RS if both present *) if count_macro "PP" lines > 0 && count_macro "RS" lines > 0 then results := ("PP+RS", strategy_pp_rs lines) :: !results; + (* Try nix3 style if UR macros present *) + if count_macro "UR" lines > 0 && count_macro "IP" lines > 0 then + results := ("nix", strategy_nix lines) :: !results; (* Always try deroff as fallback *) results := ("deroff", strategy_deroff_lines lines) :: !results; - (* Pick the result with the most entries *) + (* Prefer specialized strategies over deroff fallback *) + let specialized = + List.filter (fun (name, entries) -> name <> "deroff" && entries <> []) !results + in + let candidates = if specialized <> [] then specialized else !results in let best = List.fold_left (fun (best_name, best_entries) (name, entries) -> if List.length entries >= List.length best_entries then (name, entries) else (best_name, best_entries) - ) ("none", []) !results + ) ("none", []) candidates in snd best +(* --- SYNOPSIS command name extraction --- *) + +let extract_synopsis_command_lines lines = + let classified = List.map classify_line lines in + let is_synopsis name = + let s = String.uppercase_ascii (String.trim name) in + s = "SYNOPSIS" + in + let extract_cmd line = + let words = String.split_on_char ' ' (String.trim line) in + let words = List.filter (fun w -> String.length w > 0) words in + let is_cmd_char = function + | 'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '.' -> true + | _ -> false + in + let rec take = function + | [] -> [] + | w :: rest -> + if String.length w > 0 + && (w.[0] = '[' || w.[0] = '-' || w.[0] = '<' + || w.[0] = '(' || w.[0] = '{') + then [] + else if String.for_all is_cmd_char w then + w :: take rest + else [] + in + match take words with + | [] -> None + | cmd -> Some (String.concat " " cmd) + in + let rec find = function + | [] -> None + | Macro ("SH", args) :: rest when is_synopsis args -> collect rest + | _ :: rest -> find rest + and collect = function + | [] -> None + | Macro ("SH", _) :: _ -> None + | Text s :: _ -> + let s = String.trim s in + if String.length s > 0 then extract_cmd s else None + | Macro (("B" | "BI" | "BR"), args) :: _ -> + let s = strip_inline_macro_args args |> strip_groff_escapes |> String.trim in + if String.length s > 0 then extract_cmd s else None + | _ :: rest -> collect rest + in + find classified + +let extract_synopsis_command contents = + let lines = String.split_on_char '\n' contents in + extract_synopsis_command_lines lines + (* --- Top-level API --- *) let parse_manpage_lines lines = let options_section = extract_options_section lines in - extract_entries options_section + let entries = extract_entries options_section in + { entries; subcommands = [] } let parse_manpage_string contents = let lines = String.split_on_char '\n' contents in parse_manpage_lines lines -let parse_manpage_gzipped_file path = - let ic = Unix.open_process_in (Printf.sprintf "gzip -dc %s" (Filename.quote path)) in - let buf = Buffer.create 4096 in - (try while true do - let line = input_line ic in - Buffer.add_string buf line; - Buffer.add_char buf '\n' - done with End_of_file -> ()); - let _ = Unix.close_process_in ic in - parse_manpage_string (Buffer.contents buf) - -let parse_manpage_file path = - if Filename.check_suffix path ".gz" then - parse_manpage_gzipped_file path - else begin +let read_manpage_file path = + if Filename.check_suffix path ".gz" then begin + let ic = Gzip.open_in path in + let buf = Buffer.create 8192 in + let chunk = Bytes.create 8192 in + (try while true do + let n = Gzip.input ic chunk 0 8192 in + if n = 0 then raise Exit + else Buffer.add_subbytes buf chunk 0 n + done with Exit | End_of_file -> ()); + Gzip.close_in ic; + Buffer.contents buf + end else begin let ic = open_in path in let n = in_channel_length ic in let s = Bytes.create n in really_input ic s 0 n; close_in ic; - parse_manpage_string (Bytes.to_string s) + Bytes.to_string s end + +let parse_manpage_file path = + read_manpage_file path |> parse_manpage_string diff --git a/lib/nushell.ml b/lib/nushell.ml index b6970f3..517090f 100644 --- a/lib/nushell.ml +++ b/lib/nushell.ml @@ -1,6 +1,90 @@ open Parser -(* Map a param name/type hint to a nushell type *) +(* Nushell built-in commands and keywords *) +let nushell_builtins = [ + "alias"; "all"; "ansi"; "any"; "append"; "ast"; "attr"; + "bits"; "break"; "bytes"; + "cal"; "cd"; "char"; "chunk-by"; "chunks"; "clear"; "collect"; + "columns"; "commandline"; "compact"; "complete"; "config"; "const"; + "continue"; "cp"; + "date"; "debug"; "decode"; "def"; "default"; "describe"; "detect"; + "do"; "drop"; "du"; + "each"; "echo"; "encode"; "enumerate"; "error"; "every"; "exec"; + "exit"; "explain"; "explore"; "export"; "export-env"; "extern"; + "fill"; "filter"; "find"; "first"; "flatten"; "for"; "format"; "from"; + "generate"; "get"; "glob"; "grid"; "group-by"; + "hash"; "headers"; "help"; "hide"; "hide-env"; "histogram"; + "history"; "http"; + "if"; "ignore"; "input"; "insert"; "inspect"; "interleave"; "into"; + "is-admin"; "is-empty"; "is-not-empty"; "is-terminal"; "items"; + "job"; "join"; + "keybindings"; "kill"; + "last"; "length"; "let"; "let-env"; "lines"; "load-env"; "loop"; "ls"; + "match"; "math"; "merge"; "metadata"; "mkdir"; "mktemp"; "module"; + "move"; "mut"; "mv"; + "nu-check"; "nu-highlight"; + "open"; "overlay"; + "panic"; "par-each"; "parse"; "path"; "plugin"; "port"; "prepend"; "print"; "ps"; + "query"; + "random"; "reduce"; "reject"; "rename"; "return"; "reverse"; "rm"; + "roll"; "rotate"; "run-external"; + "save"; "schema"; "scope"; "select"; "seq"; "shuffle"; "skip"; "sleep"; + "slice"; "sort"; "sort-by"; "source"; "source-env"; "split"; "start"; + "stor"; "str"; "sys"; + "table"; "take"; "tee"; "term"; "timeit"; "to"; "touch"; "transpose"; + "try"; "tutor"; + "ulimit"; "umask"; "uname"; "uniq"; "uniq-by"; "unlet"; "update"; + "upsert"; "url"; "use"; + "values"; "version"; "view"; + "watch"; "where"; "which"; "while"; "whoami"; "window"; "with-env"; "wrap"; + "zip"; +] + +let builtin_set = lazy ( + let tbl = Hashtbl.create (List.length nushell_builtins) in + List.iter (fun s -> Hashtbl.replace tbl s true) nushell_builtins; + tbl) + +let is_nushell_builtin cmd = + Hashtbl.mem (Lazy.force builtin_set) cmd + +let dedup_entries entries = + let key_of entry = + match entry.switch with + | Short c -> Printf.sprintf "-%c" c + | Long l | Both (_, l) -> Printf.sprintf "--%s" l + in + let score entry = + let sw = match entry.switch with Both _ -> 10 | _ -> 0 in + let p = match entry.param with Some _ -> 5 | None -> 0 in + let d = min 5 (String.length entry.desc / 10) in + sw + p + d + in + let best = Hashtbl.create 64 in + List.iter (fun e -> + let k = key_of e in + match Hashtbl.find_opt best k with + | Some prev when score prev >= score e -> () + | _ -> Hashtbl.replace best k e + ) entries; + let covered_shorts = Hashtbl.create 16 in + Hashtbl.iter (fun _ e -> + match e.switch with + | Both (c, _) -> Hashtbl.replace covered_shorts c true + | _ -> () + ) best; + let seen = Hashtbl.create 64 in + List.filter_map (fun e -> + let k = key_of e in + if Hashtbl.mem seen k then None + else + match e.switch with + | Short c when Hashtbl.mem covered_shorts c -> None + | _ -> + Hashtbl.add seen k true; + Hashtbl.find_opt best k + ) entries + let nushell_type_of_param = function | "FILE" | "file" | "PATH" | "path" | "DIR" | "dir" | "DIRECTORY" | "FILENAME" | "PATTERNFILE" -> "path" @@ -8,75 +92,63 @@ let nushell_type_of_param = function | "LINES" | "DEPTH" | "depth" -> "int" | _ -> "string" -(* Escape a nushell string: wrap in double quotes, escape inner quotes *) let escape_nu s = - let buf = Buffer.create (String.length s + 2) in - String.iter (fun c -> - match c with - | '"' -> Buffer.add_string buf "\\\"" - | '\\' -> Buffer.add_string buf "\\\\" - | _ -> Buffer.add_char buf c - ) s; - Buffer.contents buf + if not (String.contains s '"') && not (String.contains s '\\') then s + else begin + let buf = Buffer.create (String.length s + 4) in + String.iter (fun c -> match c with + | '"' -> Buffer.add_string buf "\\\"" + | '\\' -> Buffer.add_string buf "\\\\" + | _ -> Buffer.add_char buf c + ) s; + Buffer.contents buf + end -(* Format a single flag for nushell extern *) let format_flag entry = - let buf = Buffer.create 64 in - Buffer.add_string buf " "; - (* Flag name *) - (match entry.switch with - | Both (s, l) -> - Buffer.add_string buf (Printf.sprintf "--%s(-%c)" l s) - | Long l -> - Buffer.add_string buf (Printf.sprintf "--%s" l) - | Short s -> - Buffer.add_string buf (Printf.sprintf "-%c" s)); - (* Type annotation *) - (match entry.param with - | Some (Mandatory name) -> - Buffer.add_string buf ": "; - Buffer.add_string buf (nushell_type_of_param name) - | Some (Optional name) -> - Buffer.add_string buf ": "; - Buffer.add_string buf (nushell_type_of_param name) - | None -> ()); - (* Description as comment *) - if String.length entry.desc > 0 then begin - (* Pad to align comments *) - let current_len = Buffer.length buf in - let target = max (current_len + 1) 40 in - for _ = current_len to target - 1 do - Buffer.add_char buf ' ' - done; - Buffer.add_string buf "# "; - Buffer.add_string buf entry.desc - end; - Buffer.contents buf + let name = match entry.switch with + | Both (s, l) -> Printf.sprintf "--%s(-%c)" l s + | Long l -> Printf.sprintf "--%s" l + | Short s -> Printf.sprintf "-%c" s + in + let typed = match entry.param with + | Some (Mandatory p) | Some (Optional p) -> ": " ^ nushell_type_of_param p + | None -> "" + in + let flag = " " ^ name ^ typed in + if String.length entry.desc = 0 then flag + else + let pad_len = max 1 (40 - String.length flag) in + flag ^ String.make pad_len ' ' ^ "# " ^ entry.desc + +let write_extern buf cmd_name result = + let entries = dedup_entries result.entries in + Printf.bprintf buf "export extern \"%s\" [\n" (escape_nu cmd_name); + List.iter (fun e -> + Buffer.add_string buf (format_flag e); Buffer.add_char buf '\n' + ) entries; + Buffer.add_string buf "]\n"; + List.iter (fun (sc : subcommand) -> + Printf.bprintf buf "\nexport extern \"%s %s\" [ # %s\n]\n" + (escape_nu cmd_name) (escape_nu sc.name) (escape_nu sc.desc) + ) result.subcommands -(* Generate nushell extern definition for a command *) let generate_extern cmd_name result = let buf = Buffer.create 1024 in - (* Main extern with flags *) - Buffer.add_string buf (Printf.sprintf "export extern \"%s\" [\n" (escape_nu cmd_name)); - List.iter (fun entry -> - Buffer.add_string buf (format_flag entry); - Buffer.add_char buf '\n' - ) result.entries; - Buffer.add_string buf "]\n"; - (* Subcommand externs *) - List.iter (fun (sc : subcommand) -> - Buffer.add_string buf - (Printf.sprintf "\nexport extern \"%s %s\" [ # %s\n]\n" - (escape_nu cmd_name) (escape_nu sc.name) (escape_nu sc.desc)) - ) result.subcommands; + write_extern buf cmd_name result; Buffer.contents buf -(* Generate a complete nushell module *) -let generate_module cmd_name result = - Printf.sprintf "module %s-completions {\n%s}\n" - cmd_name (generate_extern cmd_name result) +let module_name_of cmd_name = + let s = String.map (function + | ('a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_') as c -> c | _ -> '-') cmd_name in + s ^ "-completions" + +let generate_module cmd_name result = + let m = module_name_of cmd_name in + let buf = Buffer.create 1024 in + Printf.bprintf buf "module %s {\n" m; + write_extern buf cmd_name result; + Printf.bprintf buf "}\n\nuse %s *\n" m; + Buffer.contents buf -(* Generate from manpage entries (no subcommands) *) let generate_extern_from_entries cmd_name entries = - let result = { entries; subcommands = [] } in - generate_extern cmd_name result + generate_extern cmd_name { entries; subcommands = [] } diff --git a/lib/parser.ml b/lib/parser.ml index a4eaae7..9aedaa2 100644 --- a/lib/parser.ml +++ b/lib/parser.ml @@ -128,12 +128,15 @@ let param_parser = space_upper_param; space_type_param ] >>| fun a -> Some a) -(* Switch parser: -a, --all | -a | --all *) +(* Switch parser: -a, --all | --all / -a | -a | --all *) let switch_parser = choice [ (short_switch >>= fun s -> comma *> long_switch >>| fun l -> Both (s, l)); + (long_switch >>= fun l -> + inline_ws *> char '/' *> inline_ws *> + short_switch >>| fun s -> Both (s, l)); (short_switch >>| fun s -> Short s); (long_switch >>| fun l -> Long l); ] @@ -219,10 +222,15 @@ let entry = (* --- Subcommand parsing --- *) (* A subcommand line: " name description" *) +let is_subcommand_char = function + | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true + | _ -> false + let subcommand_entry = inline_ws *> - take_while1 (fun c -> c <> ' ' && c <> '\t' && c <> '\n') >>= fun name -> - (* Must have at least 2 spaces before description *) + take_while1 is_subcommand_char >>= fun name -> + if String.length name < 2 then fail "subcommand name too short" + else char ' ' *> char ' ' *> inline_ws *> rest_of_line <* eol >>| fun desc -> { name; desc = String.trim desc } diff --git a/nix/module.nix b/nix/module.nix index f3a920b..2778e7c 100644 --- a/nix/module.nix +++ b/nix/module.nix @@ -1,70 +1,37 @@ -# NixOS module: automatic nushell completion generation from manpages +# NixOS module: automatic nushell completion generation # -# Modeled on nixpkgs' programs/fish.nix completion generator. -# For each package in environment.systemPackages, a small derivation runs -# `inshellah manpage-dir` against its share/man directory. Results are merged -# into a single directory and placed in nushell's vendor autoload path. +# Generates completions using three strategies in priority order: +# 1. Native completion generators (e.g. CMD completions nushell) +# 2. Manpage parsing +# 3. --help output parsing # -# Usage in your NixOS configuration: +# Runs as a single pass during the system profile build. # +# Usage: # { pkgs, ... }: { # imports = [ ./path/to/inshellah/nix/module.nix ]; # programs.inshellah.enable = true; -# # Optionally add packages not in systemPackages: -# # programs.inshellah.extraPackages = [ pkgs.kubectl ]; # } -{ config, lib, pkgs, ... }: +{ + config, + lib, + pkgs, + ... +}: let cfg = config.programs.inshellah; - - # The inshellah binary — override this if you build from the local flake - inshellahPkg = cfg.package; - - # Per-package derivation: run inshellah manpage-dir against a package's manpages - generateCompletions = package: - pkgs.runCommandLocal - (let - inherit (lib.strings) stringLength substring storeDir; - storeLength = stringLength storeDir + 34; - pathName = substring storeLength (stringLength package - storeLength) package; - in - (package.name or pathName) + "_nu-completions" - ) - ({ - inherit package; - nativeBuildInputs = [ inshellahPkg ]; - } // lib.optionalAttrs (package ? meta.priority) { - meta.priority = package.meta.priority; - }) - '' - mkdir -p $out - if [ -d "$package/share/man" ]; then - inshellah manpage-dir "$package/share/man" > "$out/completions.nu" 2>/dev/null || true - # Remove empty files - find $out -empty -delete - fi - ''; - -in { +in +{ options.programs.inshellah = { - enable = lib.mkEnableOption "nushell completion generation from manpages via inshellah"; + enable = lib.mkEnableOption "nushell completion generation via inshellah"; package = lib.mkOption { type = lib.types.package; description = "The inshellah package to use for generating completions."; }; - extraPackages = lib.mkOption { - type = lib.types.listOf lib.types.package; - default = []; - description = '' - Additional packages to generate nushell completions from, beyond - those in {option}`environment.systemPackages`. - ''; - }; - generatedCompletionsPath = lib.mkOption { type = lib.types.str; default = "/share/nushell/vendor/autoload"; @@ -77,33 +44,38 @@ in { }; config = lib.mkIf cfg.enable { - # Merge all per-package completions into a single directory. - # This path ends up in the system profile, and nushell discovers it - # via XDG_DATA_DIRS -> $prefix/share/nushell/vendor/autoload/ environment.pathsToLink = [ cfg.generatedCompletionsPath ]; - environment.systemPackages = [ - (pkgs.buildEnv { - name = "nushell-generated-completions"; - ignoreCollisions = true; - paths = map generateCompletions ( - config.environment.systemPackages ++ cfg.extraPackages - ); - pathsToLink = [ "/" ]; - # Nest everything under the vendor autoload path - postBuild = '' - if [ -d "$out" ]; then - tmp=$(mktemp -d) - cp -r "$out/"* "$tmp/" 2>/dev/null || true - rm -rf "$out/"* - mkdir -p "$out${cfg.generatedCompletionsPath}" - for f in "$tmp"/*.nu; do - [ -f "$f" ] && cp "$f" "$out${cfg.generatedCompletionsPath}/" - done - rm -rf "$tmp" - fi - ''; - }) - ]; + environment.extraSetup = + let + inshellah = "${cfg.package}/bin/inshellah"; + destDir = "$out${cfg.generatedCompletionsPath}"; + segments = lib.filter (s: s != "") (lib.splitString "/" cfg.generatedCompletionsPath); + derefPath = lib.concatMapStringsSep "\n " (seg: '' + _cur="$_cur/${seg}" + if [ -L "$_cur" ]; then + _target=$(readlink "$_cur") + rm "$_cur" + mkdir -p "$_cur" + if [ -d "$_target" ]; then + cp -rT "$_target" "$_cur" + chmod -R u+w "$_cur" + fi + fi'') segments; + in + '' + _cur="$out" + ${derefPath} + mkdir -p ${destDir} + + # Generate all completions in one pass: + # native generators > manpages > --help fallback + if [ -d "$out/bin" ] && [ -d "$out/share/man" ]; then + ${inshellah} generate "$out/bin" "$out/share/man" -o ${destDir} \ + 2>/dev/null || true + fi + + find ${destDir} -maxdepth 1 -empty -delete + ''; }; } diff --git a/test/test_inshellah.ml b/test/test_inshellah.ml index c057a84..55567f3 100644 --- a/test/test_inshellah.ml +++ b/test/test_inshellah.ml @@ -177,15 +177,15 @@ with \fB\-l\fR, scale sizes by SIZE .SH AUTHOR Written by someone. |} in - let entries = parse_manpage_string groff in - check "three entries" (List.length entries = 3); - if List.length entries >= 1 then begin - let e = List.hd entries in + let result = parse_manpage_string groff in + check "three entries" (List.length result.entries = 3); + if List.length result.entries >= 1 then begin + let e = List.hd result.entries in check "first is -a/--all" (e.switch = Both ('a', "all")); check "first desc" (String.length e.desc > 0) end; - if List.length entries >= 3 then begin - let e = List.nth entries 2 in + if List.length result.entries >= 3 then begin + let e = List.nth result.entries 2 in check "block-size switch" (e.switch = Long "block-size"); check "block-size param" (e.param = Some (Mandatory "SIZE")) end @@ -199,10 +199,10 @@ Allow insecure connections. Write output to file. .SH SEE ALSO |} in - let entries = parse_manpage_string groff in - check "two entries" (List.length entries = 2); - if List.length entries >= 1 then begin - let e = List.hd entries in + let result = parse_manpage_string groff in + check "two entries" (List.length result.entries = 2); + if List.length result.entries >= 1 then begin + let e = List.hd result.entries in check "first is -k/--insecure" (e.switch = Both ('k', "insecure")) end @@ -221,8 +221,115 @@ foo \- does stuff .SH DESCRIPTION Does stuff. |} in - let entries = parse_manpage_string groff in - check "no entries" (List.length entries = 0) + let result = parse_manpage_string groff in + check "no entries" (List.length result.entries = 0) + +let test_slash_switch_separator () = + Printf.printf "\n== Slash switch separator (--long / -s) ==\n"; + let r = parse " --verbose / -v Increase verbosity\n" in + check "one entry" (List.length r.entries = 1); + let e = List.hd r.entries in + check "both switch" (e.switch = Both ('v', "verbose")); + check "no param" (e.param = None); + check "desc" (e.desc = "Increase verbosity") + +let test_manpage_nix3_style () = + Printf.printf "\n== Manpage nix3 style ==\n"; + let groff = {|.SH Options +.SS Logging-related options +.IP "\(bu" 3 +.UR #opt-verbose +\f(CR--verbose\fR +.UE +/ \f(CR-v\fR +.IP +Increase the logging verbosity level. +.IP "\(bu" 3 +.UR #opt-quiet +\f(CR--quiet\fR +.UE +.IP +Decrease the logging verbosity level. +.SH SEE ALSO +|} in + let result = parse_manpage_string groff in + check "two entries" (List.length result.entries = 2); + if List.length result.entries >= 1 then begin + let e = List.hd result.entries in + check "verbose is Both" (e.switch = Both ('v', "verbose")); + check "verbose desc" (String.length e.desc > 0) + end; + if List.length result.entries >= 2 then begin + let e = List.nth result.entries 1 in + check "quiet is Long" (e.switch = Long "quiet"); + check "quiet desc" (String.length e.desc > 0) + end + +let test_manpage_nix3_with_params () = + Printf.printf "\n== Manpage nix3 with params ==\n"; + let groff = {|.SH Options +.IP "\(bu" 3 +.UR #opt-arg +\f(CR--arg\fR +.UE +\fIname\fR \fIexpr\fR +.IP +Pass the value as the argument name to Nix functions. +.IP "\(bu" 3 +.UR #opt-include +\f(CR--include\fR +.UE +/ \f(CR-I\fR \fIpath\fR +.IP +Add path to search path entries. +.IP +This option may be given multiple times. +.SH SEE ALSO +|} in + let result = parse_manpage_string groff in + check "two entries" (List.length result.entries = 2); + if List.length result.entries >= 1 then begin + let e = List.hd result.entries in + check "arg is Long" (e.switch = Long "arg"); + check "arg has param" (e.param <> None) + end; + if List.length result.entries >= 2 then begin + let e = List.nth result.entries 1 in + check "include is Both" (e.switch = Both ('I', "include")); + check "include has path param" (e.param = Some (Mandatory "path")) + end + +let test_synopsis_subcommand () = + Printf.printf "\n== SYNOPSIS subcommand detection ==\n"; + let groff = {|.SH "SYNOPSIS" +.sp +.nf +\fBgit\fR \fBcommit\fR [\fB\-a\fR | \fB\-\-interactive\fR] +.fi +.SH "DESCRIPTION" +|} in + let cmd = extract_synopsis_command groff in + check "detected git commit" (cmd = Some "git commit") + +let test_synopsis_standalone () = + Printf.printf "\n== SYNOPSIS standalone command ==\n"; + let groff = {|.SH Synopsis +.LP +\f(CRnix-build\fR [\fIpaths\fR] +.SH Description +|} in + let cmd = extract_synopsis_command groff in + check "detected nix-build" (cmd = Some "nix-build") + +let test_synopsis_nix3 () = + Printf.printf "\n== SYNOPSIS nix3 subcommand ==\n"; + let groff = {|.SH Synopsis +.LP +\f(CRnix run\fR [\fIoption\fR] \fIinstallable\fR +.SH Description +|} in + let cmd = extract_synopsis_command groff in + check "detected nix run" (cmd = Some "nix run") (* --- Nushell generation tests --- *) @@ -276,8 +383,8 @@ do not ignore entries starting with . scale sizes by SIZE .SH AUTHOR |} in - let entries = parse_manpage_string groff in - let nu = generate_extern_from_entries "ls" entries in + let result = parse_manpage_string groff in + let nu = generate_extern "ls" result in check "has extern" (contains nu "export extern \"ls\""); check "has --all(-a)" (contains nu "--all(-a)"); check "has --block-size" (contains nu "--block-size: string") @@ -290,6 +397,54 @@ let test_nushell_module () = check "has extern inside" (contains nu "export extern \"myapp\""); check "has flag" (contains nu "--verbose(-v)") +let test_dedup_entries () = + Printf.printf "\n== Deduplication ==\n"; + let r = parse {| -v, --verbose verbose output + --verbose verbose mode + -v be verbose +|} in + let nu = generate_extern "test" r in + (* Count occurrences of --verbose *) + let count = + let re = Str.regexp_string "--verbose" in + let n = ref 0 in + let i = ref 0 in + (try while true do + let _ = Str.search_forward re nu !i in + incr n; i := Str.match_end () + done with Not_found -> ()); + !n + in + check "verbose appears once" (count = 1); + check "best version kept (Both)" (contains nu "--verbose(-v)") + +let test_dedup_manpage () = + Printf.printf "\n== Dedup from manpage ==\n"; + let groff = {|.SH OPTIONS +.TP +\fB\-v\fR, \fB\-\-verbose\fR +Be verbose. +.SH DESCRIPTION +Use \fB\-v\fR for verbose output. +Use \fB\-\-verbose\fR to see more. +|} in + let result = parse_manpage_string groff in + let nu = generate_extern "test" result in + check "has --verbose(-v)" (contains nu "--verbose(-v)"); + (* Should not have standalone -v or duplicate --verbose *) + let lines = String.split_on_char '\n' nu in + let verbose_lines = List.filter (fun l -> contains l "verbose") lines in + check "only one verbose line" (List.length verbose_lines = 1) + +let test_font_boundary_spacing () = + Printf.printf "\n== Font boundary spacing ==\n"; + (* \fB--max-results\fR\fIcount\fR should become "--max-results count" *) + let s = strip_groff_escapes {|\fB\-\-max\-results\fR\fIcount\fR|} in + check "has space before param" (contains s "--max-results count"); + (* \fB--color\fR[=\fIWHEN\fR] should NOT insert space before = *) + let s2 = strip_groff_escapes {|\fB\-\-color\fR[=\fIWHEN\fR]|} in + check "no space before =" (contains s2 "--color[=WHEN]") + let () = Printf.printf "Running help parser tests...\n"; test_gnu_basic (); @@ -314,6 +469,12 @@ let () = test_manpage_ip_style (); test_manpage_groff_stripping (); test_manpage_empty_options (); + test_slash_switch_separator (); + test_manpage_nix3_style (); + test_manpage_nix3_with_params (); + test_synopsis_subcommand (); + test_synopsis_standalone (); + test_synopsis_nix3 (); Printf.printf "\nRunning nushell generation tests...\n"; test_nushell_basic (); @@ -322,5 +483,10 @@ let () = test_nushell_from_manpage (); test_nushell_module (); + Printf.printf "\nRunning dedup and font tests...\n"; + test_dedup_entries (); + test_dedup_manpage (); + test_font_boundary_spacing (); + Printf.printf "\n=== Results: %d passed, %d failed ===\n" !passes !failures; if !failures > 0 then exit 1