This commit is contained in:
atagen 2026-03-16 22:23:10 +11:00
commit fd80fbab7e
48 changed files with 16775 additions and 0 deletions

View file

@ -0,0 +1,10 @@
[package]
name = "jupiter-api-types"
version.workspace = true
edition.workspace = true
[dependencies]
serde = { workspace = true }
serde_json = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,14 @@
[package]
name = "jupiter-cache"
version.workspace = true
edition.workspace = true
[dependencies]
jupiter-api-types = { workspace = true }
axum = { workspace = true }
tokio = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
base64 = { workspace = true }

View file

@ -0,0 +1,43 @@
//! Error types for the Nix binary cache.
//!
//! [`CacheError`] is the single error enum used across every layer of the
//! cache -- storage I/O, NARInfo parsing, and capacity limits. Axum route
//! handlers in [`crate::routes`] translate these variants into the appropriate
//! HTTP status codes (404, 400, 500, etc.).
use thiserror::Error;
/// Unified error type for all binary cache operations.
///
/// Each variant maps to a different failure mode that can occur when reading,
/// writing, or validating cache artefacts.
#[derive(Debug, Error)]
pub enum CacheError {
/// The requested store-path hash does not exist in the cache.
///
/// A store hash is the first 32 base-32 characters of a Nix store path
/// (e.g. the `aaaa...` part of `/nix/store/aaaa...-hello-2.12`).
/// This error is returned when neither a `.narinfo` file nor a
/// corresponding NAR archive can be found on disk.
#[error("store hash not found: {0}")]
NotFound(String),
/// A low-level filesystem I/O error occurred while reading or writing
/// cache data. This typically surfaces as an HTTP 500 to the client.
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
/// The NARInfo text failed to parse -- a required field is missing or a
/// value is malformed. When this comes from a `PUT` request it results
/// in an HTTP 400 (Bad Request) response.
#[error("invalid narinfo: {0}")]
InvalidNarInfo(String),
/// The on-disk cache has exceeded its configured maximum size.
///
/// This variant exists to support an optional size cap (`max_size_gb` in
/// [`crate::store::LocalStore::new`]). When the cap is hit, further
/// uploads are rejected until space is freed.
#[error("storage full")]
StorageFull,
}

View file

@ -0,0 +1,44 @@
//! # jupiter-cache -- Built-in Nix Binary Cache for Jupiter
//!
//! Jupiter is a self-hosted, wire-compatible replacement for
//! [hercules-ci.com](https://hercules-ci.com). This crate implements an
//! **optional** built-in Nix binary cache server that speaks the same HTTP
//! protocol used by `cache.nixos.org` and other standard Nix binary caches.
//!
//! ## Why an optional cache?
//!
//! In a typical Hercules CI deployment each agent already ships with its own
//! binary cache support. However, some organisations prefer a single, shared
//! cache server that every agent (and every developer workstation) can push to
//! and pull from. `jupiter-cache` fills that role: it can be enabled inside
//! the Jupiter server process so that no separate cache infrastructure (e.g.
//! an S3 bucket or a dedicated `nix-serve` instance) is required.
//!
//! ## The Nix binary cache HTTP protocol
//!
//! The protocol is intentionally simple and fully compatible with the one
//! implemented by `cache.nixos.org`:
//!
//! | Method | Path | Description |
//! |--------|-------------------------------|-------------------------------------------|
//! | `GET` | `/nix-cache-info` | Returns cache metadata (store dir, etc.) |
//! | `GET` | `/{storeHash}.narinfo` | Returns the NARInfo for a store path hash |
//! | `PUT` | `/{storeHash}.narinfo` | Uploads a NARInfo (agent -> cache) |
//! | `GET` | `/nar/{narHash}.nar[.xz|.zst]`| Serves the actual NAR archive |
//!
//! A **NARInfo** file is the metadata envelope for a Nix store path. It
//! describes the NAR archive's hash, compressed and uncompressed sizes,
//! references to other store paths, the derivation that produced the path,
//! and one or more cryptographic signatures that attest to its authenticity.
//!
//! ## Crate layout
//!
//! * [`error`] -- Error types for cache operations.
//! * [`narinfo`] -- Parser and serialiser for the NARInfo text format.
//! * [`store`] -- [`LocalStore`](store::LocalStore) -- on-disk storage backend.
//! * [`routes`] -- Axum route handlers that expose the cache over HTTP.
pub mod error;
pub mod narinfo;
pub mod routes;
pub mod store;

View file

@ -0,0 +1,233 @@
//! NARInfo parser and serialiser.
//!
//! Every path in a Nix binary cache is described by a **NARInfo** file. When
//! a Nix client wants to know whether a particular store path is available in
//! the cache it fetches `https://<cache>/<storeHash>.narinfo`. The response
//! is a simple, line-oriented, key-value text format that looks like this:
//!
//! ```text
//! StorePath: /nix/store/aaaa...-hello-2.12
//! URL: nar/1b2m2y0h...nar.xz
//! Compression: xz
//! FileHash: sha256:1b2m2y0h...
//! FileSize: 54321
//! NarHash: sha256:0abcdef...
//! NarSize: 123456
//! References: bbbb...-glibc-2.37 cccc...-gcc-12.3.0
//! Deriver: dddd...-hello-2.12.drv
//! Sig: cache.example.com:AAAA...==
//! ```
//!
//! ## Fields
//!
//! | Field | Required | Description |
//! |---------------|----------|-----------------------------------------------------|
//! | `StorePath` | yes | Full Nix store path |
//! | `URL` | yes | Relative URL to the (possibly compressed) NAR file |
//! | `Compression` | no | `xz`, `zstd`, `bzip2`, or `none` (default: `none`) |
//! | `FileHash` | yes | Hash of the compressed file on disk |
//! | `FileSize` | yes | Size (bytes) of the compressed file |
//! | `NarHash` | yes | Hash of the uncompressed NAR archive |
//! | `NarSize` | yes | Size (bytes) of the uncompressed NAR archive |
//! | `References` | no | Space-separated list of store-path basenames this path depends on |
//! | `Deriver` | no | The `.drv` file that produced this output |
//! | `Sig` | no | Cryptographic signature(s); may appear multiple times|
//!
//! This module provides [`NarInfo::parse`] to deserialise the text format and
//! a [`fmt::Display`] implementation to serialise it back.
use std::fmt;
/// A parsed NARInfo record.
///
/// Represents all the metadata Nix needs to fetch and verify a single store
/// path from a binary cache. Instances are created by parsing the text
/// format received from HTTP requests ([`NarInfo::parse`]) and serialised
/// back to text via the [`Display`](fmt::Display) implementation when serving
/// `GET /{storeHash}.narinfo` responses.
#[derive(Debug, Clone)]
pub struct NarInfo {
/// Full Nix store path, e.g. `/nix/store/aaaa...-hello-2.12`.
pub store_path: String,
/// Relative URL pointing to the (possibly compressed) NAR archive,
/// e.g. `nar/1b2m2y0h...nar.xz`. The client appends this to the
/// cache base URL to download the archive.
pub url: String,
/// Compression algorithm applied to the NAR archive on disk.
/// Nix uses this to decide how to decompress after downloading.
pub compression: Compression,
/// Content-addressed hash of the compressed file (the file referenced
/// by [`url`](Self::url)), usually in the form `sha256:<base32>`.
pub file_hash: String,
/// Size in bytes of the compressed file on disk.
pub file_size: u64,
/// Content-addressed hash of the *uncompressed* NAR archive.
/// Nix uses this to verify integrity after decompression.
pub nar_hash: String,
/// Size in bytes of the uncompressed NAR archive.
pub nar_size: u64,
/// Other store paths that this path depends on at runtime, listed as
/// basenames (e.g. `bbbb...-glibc-2.37`). An empty `Vec` means the
/// path is self-contained.
pub references: Vec<String>,
/// The `.drv` basename that built this output, if known.
pub deriver: Option<String>,
/// Zero or more cryptographic signatures that attest to the
/// authenticity of this store path. Each signature is of the form
/// `<key-name>:<base64-sig>`. Multiple `Sig` lines are allowed in
/// the on-wire format.
pub sig: Vec<String>,
}
/// The compression algorithm used for a NAR archive on disk.
///
/// When a NAR file is stored in the cache it may be compressed to save space
/// and bandwidth. The compression type is inferred from the file extension
/// (`.xz`, `.zst`, `.bz2`) and recorded in the NARInfo so that clients know
/// how to decompress the download.
#[derive(Debug, Clone, PartialEq)]
pub enum Compression {
/// No compression -- the file is a raw `.nar`.
None,
/// LZMA2 compression (`.nar.xz`). This is the most common format used
/// by `cache.nixos.org`.
Xz,
/// Zstandard compression (`.nar.zst`). Faster than xz with comparable
/// compression ratios; increasingly popular for newer caches.
Zstd,
/// Bzip2 compression (`.nar.bz2`). A legacy format still seen in some
/// older caches.
Bzip2,
}
impl NarInfo {
/// Parse the line-oriented NARInfo text format into a [`NarInfo`] struct.
///
/// The format is a series of `Key: Value` lines. Required fields are
/// `StorePath`, `URL`, `FileHash`, `FileSize`, `NarHash`, and `NarSize`.
/// If any required field is missing, a
/// [`CacheError::InvalidNarInfo`](crate::error::CacheError::InvalidNarInfo)
/// error is returned.
///
/// Unknown keys are silently ignored so that forward-compatibility with
/// future Nix versions is preserved.
pub fn parse(input: &str) -> Result<Self, crate::error::CacheError> {
let mut store_path = None;
let mut url = None;
let mut compression = Compression::None;
let mut file_hash = None;
let mut file_size = None;
let mut nar_hash = None;
let mut nar_size = None;
let mut references = Vec::new();
let mut deriver = None;
let mut sig = Vec::new();
for line in input.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
if let Some((key, value)) = line.split_once(": ") {
match key {
"StorePath" => store_path = Some(value.to_string()),
"URL" => url = Some(value.to_string()),
"Compression" => {
compression = match value {
"xz" => Compression::Xz,
"zstd" => Compression::Zstd,
"bzip2" => Compression::Bzip2,
"none" => Compression::None,
_ => Compression::None,
}
}
"FileHash" => file_hash = Some(value.to_string()),
"FileSize" => file_size = value.parse().ok(),
"NarHash" => nar_hash = Some(value.to_string()),
"NarSize" => nar_size = value.parse().ok(),
"References" => {
if !value.is_empty() {
references =
value.split_whitespace().map(String::from).collect();
}
}
"Deriver" => deriver = Some(value.to_string()),
"Sig" => sig.push(value.to_string()),
_ => {} // ignore unknown fields for forward-compatibility
}
}
}
Ok(NarInfo {
store_path: store_path.ok_or_else(|| {
crate::error::CacheError::InvalidNarInfo("missing StorePath".into())
})?,
url: url.ok_or_else(|| {
crate::error::CacheError::InvalidNarInfo("missing URL".into())
})?,
compression,
file_hash: file_hash.ok_or_else(|| {
crate::error::CacheError::InvalidNarInfo("missing FileHash".into())
})?,
file_size: file_size.ok_or_else(|| {
crate::error::CacheError::InvalidNarInfo("missing FileSize".into())
})?,
nar_hash: nar_hash.ok_or_else(|| {
crate::error::CacheError::InvalidNarInfo("missing NarHash".into())
})?,
nar_size: nar_size.ok_or_else(|| {
crate::error::CacheError::InvalidNarInfo("missing NarSize".into())
})?,
references,
deriver,
sig,
})
}
}
/// Serialises the [`NarInfo`] back into the canonical line-oriented text
/// format expected by Nix clients.
///
/// The output is suitable for returning directly as the body of a
/// `GET /{storeHash}.narinfo` HTTP response. Optional fields (`References`,
/// `Deriver`, `Sig`) are only emitted when present.
impl fmt::Display for NarInfo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "StorePath: {}", self.store_path)?;
writeln!(f, "URL: {}", self.url)?;
writeln!(
f,
"Compression: {}",
match self.compression {
Compression::None => "none",
Compression::Xz => "xz",
Compression::Zstd => "zstd",
Compression::Bzip2 => "bzip2",
}
)?;
writeln!(f, "FileHash: {}", self.file_hash)?;
writeln!(f, "FileSize: {}", self.file_size)?;
writeln!(f, "NarHash: {}", self.nar_hash)?;
writeln!(f, "NarSize: {}", self.nar_size)?;
if !self.references.is_empty() {
writeln!(f, "References: {}", self.references.join(" "))?;
}
if let Some(ref deriver) = self.deriver {
writeln!(f, "Deriver: {}", deriver)?;
}
for s in &self.sig {
writeln!(f, "Sig: {}", s)?;
}
Ok(())
}
}

View file

@ -0,0 +1,158 @@
//! Axum HTTP route handlers for the Nix binary cache protocol.
//!
//! This module exposes the standard Nix binary cache endpoints so that any
//! Nix client (or Hercules CI agent) can interact with Jupiter's built-in
//! cache exactly as it would with `cache.nixos.org` or any other
//! Nix-compatible binary cache.
//!
//! ## Endpoints
//!
//! | Method | Path | Handler | Purpose |
//! |--------|--------------------------------|--------------------|--------------------------------------------------|
//! | `GET` | `/nix-cache-info` | [`nix_cache_info`] | Return cache metadata (store dir, priority, etc.)|
//! | `GET` | `/{storeHash}.narinfo` | [`get_narinfo`] | Fetch NARInfo metadata for a store path hash |
//! | `PUT` | `/{storeHash}.narinfo` | [`put_narinfo`] | Upload NARInfo metadata (agent -> cache) |
//! | `GET` | `/nar/{filename}` | [`get_nar`] | Download a (possibly compressed) NAR archive |
//!
//! All handlers receive an `Arc<LocalStore>` via Axum's shared state
//! mechanism. Use [`cache_routes`] to obtain a configured [`Router`] that
//! can be merged into the main Jupiter server.
use axum::{
extract::{Path, State},
http::StatusCode,
response::IntoResponse,
routing::get,
Router,
};
use std::sync::Arc;
use crate::store::LocalStore;
/// Build an Axum [`Router`] that serves the Nix binary cache protocol.
///
/// The router expects an `Arc<LocalStore>` as shared state. Callers
/// typically do:
///
/// ```ignore
/// let store = Arc::new(LocalStore::new("/var/cache/jupiter", None).await?);
/// let app = cache_routes().with_state(store);
/// ```
///
/// The returned router can be nested under a sub-path or merged directly
/// into the top-level Jupiter application router.
pub fn cache_routes() -> Router<Arc<LocalStore>> {
Router::new()
.route("/nix-cache-info", get(nix_cache_info))
.route(
"/{store_hash}.narinfo",
get(get_narinfo).put(put_narinfo),
)
.route("/nar/{filename}", get(get_nar))
}
/// `GET /nix-cache-info` -- return static cache metadata.
///
/// This is the first endpoint a Nix client hits when it discovers a new
/// binary cache. The response is a simple key-value text format:
///
/// * `StoreDir: /nix/store` -- the Nix store prefix (always `/nix/store`).
/// * `WantMassQuery: 1` -- tells the client it is OK to query many
/// paths at once (e.g. during `nix-store --query`).
/// * `Priority: 30` -- a hint for substitution ordering. Lower
/// numbers are preferred. 30 is a reasonable middle-ground that lets
/// upstream caches (priority 10-20) take precedence when configured.
async fn nix_cache_info() -> impl IntoResponse {
(
StatusCode::OK,
[("Content-Type", "text/x-nix-cache-info")],
"StoreDir: /nix/store\nWantMassQuery: 1\nPriority: 30\n",
)
}
/// `GET /{store_hash}.narinfo` -- look up NARInfo by store-path hash.
///
/// `store_hash` is the 32-character base-32 hash that identifies a Nix
/// store path (the portion between `/nix/store/` and the first `-`).
///
/// On success the response has content-type `text/x-nix-narinfo` and
/// contains the NARInfo in its canonical text representation. If the hash
/// is not present in the cache, a plain 404 is returned so that the Nix
/// client can fall through to the next configured substituter.
async fn get_narinfo(
State(store): State<Arc<LocalStore>>,
Path(store_hash): Path<String>,
) -> impl IntoResponse {
match store.get_narinfo(&store_hash).await {
Ok(narinfo) => (
StatusCode::OK,
[("Content-Type", "text/x-nix-narinfo")],
narinfo.to_string(),
)
.into_response(),
Err(_) => StatusCode::NOT_FOUND.into_response(),
}
}
/// `PUT /{store_hash}.narinfo` -- upload NARInfo metadata.
///
/// Hercules CI agents call this endpoint after building a derivation to
/// publish the artefact metadata to the shared cache. The request body
/// must be a valid NARInfo text document. The handler parses it to validate
/// correctness before persisting it to disk via
/// [`LocalStore::put_narinfo`](crate::store::LocalStore::put_narinfo).
///
/// ## Response codes
///
/// * `200 OK` -- the NARInfo was stored successfully.
/// * `400 Bad Request` -- the body could not be parsed as valid NARInfo.
/// * `500 Internal Server Error` -- an I/O error occurred while writing.
async fn put_narinfo(
State(store): State<Arc<LocalStore>>,
Path(store_hash): Path<String>,
body: String,
) -> impl IntoResponse {
match crate::narinfo::NarInfo::parse(&body) {
Ok(narinfo) => match store.put_narinfo(&store_hash, &narinfo).await {
Ok(()) => StatusCode::OK.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
},
Err(e) => (StatusCode::BAD_REQUEST, e.to_string()).into_response(),
}
}
/// `GET /nar/{filename}` -- download a NAR archive.
///
/// `filename` has the form `<narHash>.nar[.xz|.zst]`. The handler strips
/// the hash from the filename, delegates to
/// [`LocalStore::get_nar`](crate::store::LocalStore::get_nar) to read the
/// raw bytes from disk, and returns them with the appropriate `Content-Type`:
///
/// | Extension | Content-Type |
/// |-----------|--------------------------|
/// | `.xz` | `application/x-xz` |
/// | `.zst` | `application/zstd` |
/// | (none) | `application/x-nix-nar` |
///
/// No server-side decompression is performed; the Nix client handles that
/// based on the `Compression` field in the corresponding NARInfo.
async fn get_nar(
State(store): State<Arc<LocalStore>>,
Path(filename): Path<String>,
) -> impl IntoResponse {
// Extract hash from filename (e.g., "abc123.nar.xz" -> "abc123")
let nar_hash = filename.split('.').next().unwrap_or(&filename);
match store.get_nar(nar_hash).await {
Ok(data) => {
let content_type = if filename.ends_with(".xz") {
"application/x-xz"
} else if filename.ends_with(".zst") {
"application/zstd"
} else {
"application/x-nix-nar"
};
(StatusCode::OK, [("Content-Type", content_type)], data).into_response()
}
Err(_) => StatusCode::NOT_FOUND.into_response(),
}
}

View file

@ -0,0 +1,178 @@
//! On-disk storage backend for the Nix binary cache.
//!
//! [`LocalStore`] persists NARInfo files and NAR archives to a local
//! directory. The on-disk layout mirrors the URL scheme of the binary cache
//! HTTP protocol:
//!
//! ```text
//! <root>/
//! aaaa....narinfo # NARInfo for store hash "aaaa..."
//! bbbb....narinfo # NARInfo for store hash "bbbb..."
//! nar/
//! 1b2m2y0h....nar.xz # Compressed NAR archive
//! cdef5678....nar.zst # Another archive, zstd-compressed
//! abcd1234....nar # Uncompressed NAR archive
//! ```
//!
//! This layout means the cache directory can also be served by any static
//! file HTTP server (e.g. nginx) if desired, without any application logic.
//!
//! ## Concurrency
//!
//! All public methods are `async` and use `tokio::fs` for non-blocking I/O.
//! The struct is designed to be wrapped in an `Arc` and shared across Axum
//! handler tasks.
use std::path::PathBuf;
use tokio::fs;
use crate::error::CacheError;
use crate::narinfo::NarInfo;
/// A filesystem-backed Nix binary cache store.
///
/// Holds NARInfo metadata files and NAR archives in a directory tree whose
/// layout is compatible with the standard Nix binary cache HTTP protocol.
/// An optional maximum size (in gigabytes) can be specified at construction
/// time to cap disk usage.
///
/// # Usage within Jupiter
///
/// When the built-in cache feature is enabled, a `LocalStore` is created at
/// server startup, wrapped in an `Arc`, and passed as Axum shared state to
/// the route handlers in [`crate::routes`]. Hercules CI agents can then
/// `PUT` NARInfo files and NAR archives into the cache and `GET` them back
/// when they (or other agents / developer workstations) need to fetch build
/// artefacts.
pub struct LocalStore {
/// Root directory of the cache on disk.
path: PathBuf,
/// Optional disk-usage cap, in bytes. Derived from the `max_size_gb`
/// constructor parameter. Currently stored for future enforcement;
/// the quota-checking logic is not yet wired up.
#[allow(dead_code)]
max_size_bytes: Option<u64>,
}
impl LocalStore {
/// Create or open a [`LocalStore`] rooted at `path`.
///
/// The constructor ensures that both the root directory and the `nar/`
/// subdirectory exist (creating them if necessary). The optional
/// `max_size_gb` parameter sets an upper bound on total disk usage; pass
/// `None` for an unbounded cache.
///
/// # Errors
///
/// Returns [`CacheError::Io`] if the directories cannot be created.
pub async fn new(
path: impl Into<PathBuf>,
max_size_gb: Option<u64>,
) -> Result<Self, CacheError> {
let path = path.into();
fs::create_dir_all(&path).await?;
fs::create_dir_all(path.join("nar")).await?;
Ok(Self {
path,
max_size_bytes: max_size_gb.map(|gb| gb * 1024 * 1024 * 1024),
})
}
/// Retrieve and parse the NARInfo for a given store hash.
///
/// `store_hash` is the first 32 base-32 characters of a Nix store path
/// (everything between `/nix/store/` and the first `-`). The method
/// reads `<root>/<store_hash>.narinfo` from disk and parses it into a
/// [`NarInfo`] struct.
///
/// # Errors
///
/// * [`CacheError::NotFound`] -- the `.narinfo` file does not exist.
/// * [`CacheError::InvalidNarInfo`] -- the file exists but cannot be parsed.
pub async fn get_narinfo(&self, store_hash: &str) -> Result<NarInfo, CacheError> {
let path = self.path.join(format!("{}.narinfo", store_hash));
let content = fs::read_to_string(&path)
.await
.map_err(|_| CacheError::NotFound(store_hash.to_string()))?;
NarInfo::parse(&content)
}
/// Write a NARInfo file for the given store hash.
///
/// Serialises `narinfo` using its [`Display`](std::fmt::Display)
/// implementation and writes it to `<root>/<store_hash>.narinfo`. If a
/// narinfo for this hash already exists it is silently overwritten.
///
/// This is the server-side handler for `PUT /{storeHash}.narinfo` --
/// Hercules CI agents call this after building a derivation to publish
/// the artefact metadata to the shared cache.
///
/// # Errors
///
/// Returns [`CacheError::Io`] if the file cannot be written.
pub async fn put_narinfo(
&self,
store_hash: &str,
narinfo: &NarInfo,
) -> Result<(), CacheError> {
let path = self.path.join(format!("{}.narinfo", store_hash));
fs::write(&path, narinfo.to_string()).await?;
Ok(())
}
/// Read a NAR archive from disk.
///
/// Because the archive may be stored with any compression extension, this
/// method probes for the file with no extension, `.xz`, and `.zst` in
/// that order. The first match wins. The raw bytes are returned without
/// any decompression -- the HTTP response will carry the appropriate
/// `Content-Type` header so the Nix client knows how to handle it.
///
/// `nar_hash` is the content-address portion of the filename (the part
/// before `.nar`).
///
/// # Errors
///
/// * [`CacheError::NotFound`] -- no file matches any of the probed extensions.
/// * [`CacheError::Io`] -- the file exists but could not be read.
pub async fn get_nar(&self, nar_hash: &str) -> Result<Vec<u8>, CacheError> {
// Try multiple extensions to support all compression variants that
// may have been uploaded.
for ext in &["", ".xz", ".zst"] {
let path = self
.path
.join("nar")
.join(format!("{}.nar{}", nar_hash, ext));
if path.exists() {
return fs::read(&path).await.map_err(CacheError::Io);
}
}
Err(CacheError::NotFound(nar_hash.to_string()))
}
/// Write a NAR archive to the `nar/` subdirectory.
///
/// `filename` should include the full name with extension, e.g.
/// `1b2m2y0h...nar.xz`. The compression variant is implicit in the
/// extension; no server-side (de)compression is performed.
///
/// # Errors
///
/// Returns [`CacheError::Io`] if the file cannot be written.
pub async fn put_nar(&self, filename: &str, data: &[u8]) -> Result<(), CacheError> {
let path = self.path.join("nar").join(filename);
fs::write(&path, data).await?;
Ok(())
}
/// Check whether a NARInfo file exists for the given store hash.
///
/// This is a lightweight existence check (no parsing) useful for
/// short-circuiting duplicate uploads. Returns `true` if
/// `<root>/<store_hash>.narinfo` is present on disk.
pub async fn has_narinfo(&self, store_hash: &str) -> bool {
self.path
.join(format!("{}.narinfo", store_hash))
.exists()
}
}

View file

@ -0,0 +1,18 @@
[package]
name = "jupiter-cli"
version.workspace = true
edition.workspace = true
[[bin]]
name = "jupiter-ctl"
path = "src/main.rs"
[dependencies]
jupiter-api-types = { workspace = true }
reqwest = { workspace = true }
clap = { workspace = true }
tokio = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
anyhow = { workspace = true }
uuid = { workspace = true }

View file

@ -0,0 +1,761 @@
//! # jupiter-ctl -- Admin CLI for the Jupiter CI Server
//!
//! `jupiter-ctl` is the administrative command-line interface for
//! [Jupiter](https://github.com/example/jupiter), a self-hosted,
//! wire-compatible replacement for [hercules-ci.com](https://hercules-ci.com).
//! It is analogous to the upstream `hci` CLI provided by Hercules CI, but
//! targets the Jupiter server's REST API instead.
//!
//! ## Architecture
//!
//! The CLI is built with [clap 4](https://docs.rs/clap/4) using derive macros.
//! Every top-level subcommand maps directly to a resource in the Jupiter REST
//! API (`/api/v1/...`):
//!
//! | Subcommand | API resource | Purpose |
//! |-------------|---------------------------------------|------------------------------------------|
//! | `account` | `/api/v1/accounts` | Create, list, inspect accounts |
//! | `agent` | `/api/v1/agents` | List and inspect connected build agents |
//! | `project` | `/api/v1/projects` | CRUD and enable/disable projects |
//! | `job` | `/api/v1/projects/{id}/jobs`, `/jobs` | List, inspect, rerun, cancel CI jobs |
//! | `state` | `/api/v1/projects/{id}/state` | Binary upload/download of state files |
//! | `token` | `/api/v1/.../clusterJoinTokens` | Manage cluster join tokens for agents |
//! | `health` | `/api/v1/health` | Quick server liveness/readiness check |
//!
//! ## Authentication
//!
//! All requests are authenticated with a bearer token. The token can be
//! supplied via the `--token` flag or the `JUPITER_TOKEN` environment
//! variable. Tokens are issued by the Jupiter server through
//! `POST /api/v1/auth/token` (or stored from a previous session).
//!
//! ## Intended audience
//!
//! This tool is designed for **server administrators**, not end users. It
//! provides unrestricted access to every server management operation
//! exposed by the Jupiter REST API.
use anyhow::Result;
use clap::{Parser, Subcommand};
use reqwest::Client;
use std::io::Write;
/// Top-level CLI definition for `jupiter-ctl`.
///
/// Parsed by clap from command-line arguments. The two global options --
/// `--server` and `--token` -- configure the [`ApiClient`] that every
/// subcommand uses to talk to the Jupiter server.
///
/// # Examples
///
/// ```bash
/// # Check server health (uses defaults: localhost:3000, no token)
/// jupiter-ctl health
///
/// # List all projects with an explicit server and token
/// jupiter-ctl --server https://ci.example.com --token $TOK project list
/// ```
#[derive(Parser)]
#[command(name = "jupiter-ctl", about = "Jupiter CI admin CLI")]
struct Cli {
/// Base URL of the Jupiter server (scheme + host + port).
///
/// Defaults to `http://localhost:3000`. Can also be set via the
/// `JUPITER_URL` environment variable.
#[arg(long, env = "JUPITER_URL", default_value = "http://localhost:3000")]
server: String,
/// Bearer token used to authenticate API requests.
///
/// Obtained from `POST /api/v1/auth/token` or stored from a previous
/// session. Can also be set via the `JUPITER_TOKEN` environment
/// variable. If omitted, requests are sent without authentication
/// (only useful for unauthenticated endpoints such as `health`).
#[arg(long, env = "JUPITER_TOKEN")]
token: Option<String>,
#[command(subcommand)]
command: Commands,
}
/// Top-level subcommands, each corresponding to a major REST API resource.
///
/// The structure mirrors the Jupiter server's REST API so that administrators
/// can perform any server-side operation from the command line.
#[derive(Subcommand)]
enum Commands {
/// Account management -- create, list, and inspect accounts.
///
/// Accounts are the top-level organizational unit in Hercules CI (and
/// therefore Jupiter). Projects, agents, and cluster join tokens all
/// belong to an account. Maps to `GET/POST /api/v1/accounts`.
Account {
#[command(subcommand)]
action: AccountAction,
},
/// Agent management -- list and inspect connected build agents.
///
/// Agents are the hercules-ci-agent processes that connect to the
/// Jupiter server to pick up and execute CI jobs. This subcommand is
/// read-only; agent lifecycle is managed by the agents themselves.
/// Maps to `GET /api/v1/agents`.
Agent {
#[command(subcommand)]
action: AgentAction,
},
/// Project management -- full CRUD plus enable/disable toggle.
///
/// A project links an account to a source repository. When enabled,
/// pushes to the repository trigger evaluation and build jobs.
/// Maps to `GET/POST /api/v1/projects`.
Project {
#[command(subcommand)]
action: ProjectAction,
},
/// Job management -- list, inspect, rerun, and cancel CI jobs.
///
/// Jobs represent individual evaluation or build tasks dispatched to
/// agents. They belong to a project and are created automatically on
/// push events. Maps to `GET/POST /api/v1/jobs` and
/// `GET /api/v1/projects/{id}/jobs`.
Job {
#[command(subcommand)]
action: JobAction,
},
/// State file management -- list, download, and upload binary state.
///
/// Hercules CI effects can persist arbitrary binary data between runs
/// using "state files". This subcommand exposes the upload/download
/// endpoints so administrators can inspect or seed state data.
/// Maps to `GET/PUT /api/v1/projects/{id}/state/{name}/data`.
State {
#[command(subcommand)]
action: StateAction,
},
/// Cluster join token management -- create, list, and revoke tokens.
///
/// Cluster join tokens authorize new hercules-ci-agent instances to
/// connect to the Jupiter server under a specific account. They are
/// analogous to the tokens generated in the Hercules CI dashboard.
/// Maps to `GET/POST /api/v1/accounts/{id}/clusterJoinTokens` and
/// `DELETE /api/v1/cluster-join-tokens/{id}`.
Token {
#[command(subcommand)]
action: TokenAction,
},
/// Server health check.
///
/// Performs a simple `GET /api/v1/health` request and prints the JSON
/// response. Useful for verifying that the Jupiter server is running
/// and reachable. Does not require authentication.
Health,
}
// ---------------------------------------------------------------------------
// Account subcommands
// ---------------------------------------------------------------------------
/// Actions available under `jupiter-ctl account`.
///
/// Maps to the `/api/v1/accounts` REST resource.
#[derive(Subcommand)]
enum AccountAction {
/// Create a new account.
///
/// Sends `POST /api/v1/accounts` with `{ "name": "<name>" }`.
/// Prints the created account object (including its server-assigned ID).
Create { name: String },
/// List all accounts.
///
/// Sends `GET /api/v1/accounts` and prints the JSON array of accounts.
List,
/// Get details for a single account by ID.
///
/// Sends `GET /api/v1/accounts/{id}` and prints the account object.
Get { id: String },
}
// ---------------------------------------------------------------------------
// Agent subcommands
// ---------------------------------------------------------------------------
/// Actions available under `jupiter-ctl agent`.
///
/// Agents are read-only from the CLI's perspective. Their lifecycle is
/// controlled by the hercules-ci-agent processes themselves; the server
/// merely tracks their state. Maps to `/api/v1/agents`.
#[derive(Subcommand)]
enum AgentAction {
/// List all connected agents.
///
/// Sends `GET /api/v1/agents` and prints the JSON array of agents.
List,
/// Get details for a single agent by ID.
///
/// Sends `GET /api/v1/agents/{id}` and prints the agent object,
/// including hostname, platform capabilities, and connection status.
Get { id: String },
}
// ---------------------------------------------------------------------------
// Project subcommands
// ---------------------------------------------------------------------------
/// Actions available under `jupiter-ctl project`.
///
/// Projects tie an account to a source repository and control whether CI
/// jobs are created on push events. Maps to `/api/v1/projects`.
#[derive(Subcommand)]
enum ProjectAction {
/// Create a new project.
///
/// Sends `POST /api/v1/projects` with the account ID, repository ID,
/// and display name. The repository ID is the forge-specific
/// identifier (e.g. GitHub repo ID).
Create {
/// Account that owns this project.
#[arg(long)]
account_id: String,
/// Forge-specific repository identifier.
#[arg(long)]
repo_id: String,
/// Human-readable project name.
#[arg(long)]
name: String,
},
/// List all projects.
///
/// Sends `GET /api/v1/projects` and prints the JSON array.
List,
/// Get details for a single project by ID.
///
/// Sends `GET /api/v1/projects/{id}`.
Get { id: String },
/// Enable a project so that pushes trigger CI jobs.
///
/// Sends `POST /api/v1/projects/{id}` with `{ "enabled": true }`.
Enable { id: String },
/// Disable a project so that pushes no longer trigger CI jobs.
///
/// Sends `POST /api/v1/projects/{id}` with `{ "enabled": false }`.
Disable { id: String },
}
// ---------------------------------------------------------------------------
// Job subcommands
// ---------------------------------------------------------------------------
/// Actions available under `jupiter-ctl job`.
///
/// Jobs represent evaluation or build work dispatched to agents. Maps to
/// `/api/v1/jobs` and `/api/v1/projects/{id}/jobs`.
#[derive(Subcommand)]
enum JobAction {
/// List jobs for a specific project (paginated).
///
/// Sends `GET /api/v1/projects/{project_id}/jobs?page={page}`.
List {
/// Project whose jobs to list.
#[arg(long)]
project_id: String,
/// Page number (1-indexed). Defaults to 1.
#[arg(long, default_value = "1")]
page: u64,
},
/// Get details for a single job by ID.
///
/// Sends `GET /api/v1/jobs/{id}` and prints the job object, including
/// status, timestamps, and associated evaluation results.
Get { id: String },
/// Re-run a previously completed (or failed) job.
///
/// Sends `POST /api/v1/jobs/{id}/rerun`. The server will create a new
/// job execution with the same parameters.
Rerun { id: String },
/// Cancel a currently running job.
///
/// Sends `POST /api/v1/jobs/{id}/cancel`. The agent executing the
/// job will be notified to abort.
Cancel { id: String },
}
// ---------------------------------------------------------------------------
// State subcommands
// ---------------------------------------------------------------------------
/// Actions available under `jupiter-ctl state`.
///
/// State files are opaque binary blobs that Hercules CI effects can
/// persist between runs. For example, a deployment effect might store a
/// Terraform state file. The state API uses `application/octet-stream`
/// for upload and download rather than JSON.
///
/// Maps to `/api/v1/projects/{id}/states` (listing) and
/// `/api/v1/projects/{id}/state/{name}/data` (get/put).
#[derive(Subcommand)]
enum StateAction {
/// List all state files for a project.
///
/// Sends `GET /api/v1/projects/{project_id}/states` and prints the
/// JSON array of state file metadata.
List {
/// Project whose state files to list.
#[arg(long)]
project_id: String,
},
/// Download a state file (binary).
///
/// Sends `GET /api/v1/projects/{project_id}/state/{name}/data`.
/// The raw bytes are written to `--output` if specified, otherwise
/// they are written directly to stdout. This allows piping into
/// other tools (e.g. `jupiter-ctl state get ... | tar xz`).
Get {
/// Project that owns the state file.
#[arg(long)]
project_id: String,
/// Logical name of the state file (as used in the Hercules CI effect).
#[arg(long)]
name: String,
/// Output file path. If omitted, raw bytes are written to stdout.
#[arg(long)]
output: Option<String>,
},
/// Upload (create or replace) a state file (binary).
///
/// Reads the file at `--input` and sends its contents as
/// `PUT /api/v1/projects/{project_id}/state/{name}/data` with
/// `Content-Type: application/octet-stream`.
Put {
/// Project that owns the state file.
#[arg(long)]
project_id: String,
/// Logical name of the state file.
#[arg(long)]
name: String,
/// Path to the local file whose contents will be uploaded.
#[arg(long)]
input: String,
},
}
// ---------------------------------------------------------------------------
// Token subcommands
// ---------------------------------------------------------------------------
/// Actions available under `jupiter-ctl token`.
///
/// Cluster join tokens authorize hercules-ci-agent instances to register
/// with the Jupiter server under a specific account. An agent presents
/// this token during its initial handshake; the server then associates
/// the agent with the account.
///
/// Maps to `/api/v1/accounts/{id}/clusterJoinTokens` and
/// `/api/v1/cluster-join-tokens/{id}`.
#[derive(Subcommand)]
enum TokenAction {
/// Create a new cluster join token for an account.
///
/// Sends `POST /api/v1/accounts/{account_id}/clusterJoinTokens`
/// with `{ "name": "<name>" }`. The response includes the raw token
/// value -- this is the only time it is returned in cleartext.
Create {
/// Account the token belongs to.
#[arg(long)]
account_id: String,
/// Human-readable label for the token.
#[arg(long)]
name: String,
},
/// List all cluster join tokens for an account.
///
/// Sends `GET /api/v1/accounts/{account_id}/clusterJoinTokens`.
/// Note: the raw token values are **not** included in the listing for
/// security reasons.
List {
/// Account whose tokens to list.
#[arg(long)]
account_id: String,
},
/// Revoke (delete) a cluster join token by ID.
///
/// Sends `DELETE /api/v1/cluster-join-tokens/{id}`. Any agent that
/// was using this token will be unable to re-authenticate after its
/// current session expires.
Revoke { id: String },
}
// ---------------------------------------------------------------------------
// API client
// ---------------------------------------------------------------------------
/// HTTP client wrapper for the Jupiter REST API.
///
/// `ApiClient` encapsulates a [`reqwest::Client`], the server base URL, and
/// an optional bearer token. All subcommand handlers use this struct to
/// issue HTTP requests against the Jupiter server.
///
/// The client provides convenience methods for common request patterns:
///
/// - [`get_json`](Self::get_json) / [`post_json`](Self::post_json) --
/// JSON request/response for most CRUD operations.
/// - [`get_bytes`](Self::get_bytes) / [`put_bytes`](Self::put_bytes) --
/// raw binary transfer for state file operations.
/// - [`delete`](Self::delete) -- resource deletion (e.g. token revocation).
///
/// Every method checks the HTTP status code and returns an [`anyhow::Error`]
/// with the status and response body on non-2xx responses.
struct ApiClient {
/// Underlying HTTP client (connection pool, TLS, etc.).
client: Client,
/// Base URL of the Jupiter server, e.g. `http://localhost:3000`.
base_url: String,
/// Optional bearer token for authentication. When `Some`, it is
/// attached to every outgoing request as an `Authorization: Bearer`
/// header.
token: Option<String>,
}
impl ApiClient {
/// Create a new `ApiClient` targeting the given server URL.
///
/// If `token` is `Some`, all requests will include an
/// `Authorization: Bearer <token>` header.
fn new(base_url: String, token: Option<String>) -> Self {
Self {
client: Client::new(),
base_url,
token,
}
}
/// Build an absolute URL for the given API path.
///
/// Joins the base URL with `/api/v1` and the provided `path`.
/// Trailing slashes on the base URL are normalized to avoid double
/// slashes.
///
/// # Example
///
/// ```text
/// base_url = "http://localhost:3000/"
/// path = "/accounts"
/// result = "http://localhost:3000/api/v1/accounts"
/// ```
fn url(&self, path: &str) -> String {
format!("{}/api/v1{}", self.base_url.trim_end_matches('/'), path)
}
/// Start building an HTTP request with the given method and API path.
///
/// The bearer token (if present) is automatically attached. Callers
/// can further customize the [`reqwest::RequestBuilder`] before
/// sending (e.g. adding a JSON body or custom headers).
fn request(&self, method: reqwest::Method, path: &str) -> reqwest::RequestBuilder {
let mut req = self.client.request(method, self.url(path));
if let Some(ref token) = self.token {
req = req.bearer_auth(token);
}
req
}
/// Send a `GET` request and deserialize the response as JSON.
///
/// Returns `Err` if the server responds with a non-2xx status code
/// (the error message includes both the status and the response body).
async fn get_json(&self, path: &str) -> Result<serde_json::Value> {
let resp = self.request(reqwest::Method::GET, path).send().await?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("HTTP {}: {}", status, body);
}
Ok(resp.json().await?)
}
/// Send a `POST` request with a JSON body and deserialize the JSON
/// response.
///
/// Used for creating resources (accounts, projects, tokens) and for
/// triggering actions (rerun, cancel, enable/disable).
///
/// Returns `Err` on non-2xx status codes.
async fn post_json(&self, path: &str, body: &serde_json::Value) -> Result<serde_json::Value> {
let resp = self
.request(reqwest::Method::POST, path)
.json(body)
.send()
.await?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("HTTP {}: {}", status, body);
}
Ok(resp.json().await?)
}
/// Send a `DELETE` request. Expects no response body.
///
/// Currently used only for revoking cluster join tokens
/// (`DELETE /api/v1/cluster-join-tokens/{id}`).
///
/// Returns `Err` on non-2xx status codes.
async fn delete(&self, path: &str) -> Result<()> {
let resp = self
.request(reqwest::Method::DELETE, path)
.send()
.await?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("HTTP {}: {}", status, body);
}
Ok(())
}
/// Send a `PUT` request with a raw binary body
/// (`Content-Type: application/octet-stream`).
///
/// Used to upload state file data. The Jupiter server stores the
/// bytes verbatim and makes them available for subsequent downloads.
///
/// Returns `Err` on non-2xx status codes.
async fn put_bytes(&self, path: &str, data: Vec<u8>) -> Result<()> {
let resp = self
.request(reqwest::Method::PUT, path)
.header("content-type", "application/octet-stream")
.body(data)
.send()
.await?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("HTTP {}: {}", status, body);
}
Ok(())
}
/// Send a `GET` request and return the response as raw bytes.
///
/// Used to download state file data. The response is returned as an
/// owned `Vec<u8>` without any deserialization.
///
/// Returns `Err` on non-2xx status codes.
async fn get_bytes(&self, path: &str) -> Result<Vec<u8>> {
let resp = self.request(reqwest::Method::GET, path).send().await?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_default();
anyhow::bail!("HTTP {}: {}", status, body);
}
Ok(resp.bytes().await?.to_vec())
}
}
// ---------------------------------------------------------------------------
// Entry point
// ---------------------------------------------------------------------------
/// Application entry point.
///
/// Parses CLI arguments via clap, constructs an [`ApiClient`] from the
/// global `--server` and `--token` options, then dispatches to the
/// appropriate handler based on the selected subcommand.
///
/// All subcommand handlers follow the same pattern:
/// 1. Build the API path from the subcommand arguments.
/// 2. Call the matching [`ApiClient`] method (`get_json`, `post_json`,
/// `delete`, `get_bytes`, or `put_bytes`).
/// 3. Pretty-print the JSON response to stdout (or write raw bytes for
/// state file downloads).
///
/// Errors from the HTTP layer or JSON serialization are propagated via
/// `anyhow` and printed to stderr by the tokio runtime.
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let api = ApiClient::new(cli.server, cli.token);
match cli.command {
Commands::Health => {
let resp = api.get_json("/health").await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
Commands::Account { action } => match action {
AccountAction::Create { name } => {
let resp = api
.post_json("/accounts", &serde_json::json!({ "name": name }))
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
AccountAction::List => {
let resp = api.get_json("/accounts").await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
AccountAction::Get { id } => {
let resp = api.get_json(&format!("/accounts/{}", id)).await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
},
Commands::Agent { action } => match action {
AgentAction::List => {
let resp = api.get_json("/agents").await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
AgentAction::Get { id } => {
let resp = api.get_json(&format!("/agents/{}", id)).await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
},
Commands::Project { action } => match action {
ProjectAction::Create {
account_id,
repo_id,
name,
} => {
let resp = api
.post_json(
"/projects",
&serde_json::json!({
"accountId": account_id,
"repoId": repo_id,
"name": name,
}),
)
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
ProjectAction::List => {
let resp = api.get_json("/projects").await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
ProjectAction::Get { id } => {
let resp = api.get_json(&format!("/projects/{}", id)).await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
ProjectAction::Enable { id } => {
let resp = api
.post_json(
&format!("/projects/{}", id),
&serde_json::json!({ "enabled": true }),
)
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
ProjectAction::Disable { id } => {
let resp = api
.post_json(
&format!("/projects/{}", id),
&serde_json::json!({ "enabled": false }),
)
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
},
Commands::Job { action } => match action {
JobAction::List { project_id, page } => {
let resp = api
.get_json(&format!("/projects/{}/jobs?page={}", project_id, page))
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
JobAction::Get { id } => {
let resp = api.get_json(&format!("/jobs/{}", id)).await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
JobAction::Rerun { id } => {
let resp = api
.post_json(&format!("/jobs/{}/rerun", id), &serde_json::json!({}))
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
JobAction::Cancel { id } => {
let resp = api
.post_json(&format!("/jobs/{}/cancel", id), &serde_json::json!({}))
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
},
Commands::State { action } => match action {
StateAction::List { project_id } => {
let resp = api
.get_json(&format!("/projects/{}/states", project_id))
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
StateAction::Get {
project_id,
name,
output,
} => {
let data = api
.get_bytes(&format!(
"/projects/{}/state/{}/data",
project_id, name
))
.await?;
if let Some(path) = output {
tokio::fs::write(&path, &data).await?;
println!("Written {} bytes to {}", data.len(), path);
} else {
std::io::stdout().write_all(&data)?;
}
}
StateAction::Put {
project_id,
name,
input,
} => {
let data = tokio::fs::read(&input).await?;
api.put_bytes(
&format!("/projects/{}/state/{}/data", project_id, name),
data,
)
.await?;
println!("State '{}' updated", name);
}
},
Commands::Token { action } => match action {
TokenAction::Create { account_id, name } => {
let resp = api
.post_json(
&format!("/accounts/{}/clusterJoinTokens", account_id),
&serde_json::json!({ "name": name }),
)
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
TokenAction::List { account_id } => {
let resp = api
.get_json(&format!("/accounts/{}/clusterJoinTokens", account_id))
.await?;
println!("{}", serde_json::to_string_pretty(&resp)?);
}
TokenAction::Revoke { id } => {
api.delete(&format!("/cluster-join-tokens/{}", id)).await?;
println!("Token revoked");
}
},
}
Ok(())
}

View file

@ -0,0 +1,16 @@
[package]
name = "jupiter-db"
version.workspace = true
edition.workspace = true
[dependencies]
jupiter-api-types = { workspace = true }
sqlx = { workspace = true }
tokio = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
thiserror = { workspace = true }
async-trait = { workspace = true }
tracing = { workspace = true }

View file

@ -0,0 +1,337 @@
-- =======================================================================
-- Jupiter initial schema
-- =======================================================================
--
-- This migration creates the complete data model for Jupiter, a
-- self-hosted, wire-compatible replacement for hercules-ci.com.
--
-- The schema mirrors the Hercules CI object hierarchy:
--
-- Account -> Project -> Job -> [Attributes, Builds, Effects]
--
-- Key design choices:
-- - All IDs are UUIDv4 stored as TEXT (SQLite has no native UUID type).
-- - All timestamps are TEXT in UTC "YYYY-MM-DD HH:MM:SS" format.
-- - Booleans are INTEGER 0/1 (SQLite convention).
-- - Structured data (JSON arrays/objects) are stored as TEXT and
-- serialized/deserialized at the application layer.
-- - Foreign keys enforce referential integrity (requires PRAGMA
-- foreign_keys=ON at connection time).
-- =======================================================================
-- ── Accounts ─────────────────────────────────────────────────────────
-- Top-level ownership entity. Every project, join token, and agent
-- session belongs to exactly one account. In Hercules CI an account
-- can be a "user" or an "organization".
CREATE TABLE IF NOT EXISTS accounts (
id TEXT PRIMARY KEY NOT NULL,
name TEXT NOT NULL UNIQUE, -- Human-readable display name; also used for login.
account_type TEXT NOT NULL DEFAULT 'user', -- 'user' | 'organization'
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- ── Cluster Join Tokens ──────────────────────────────────────────────
-- Bearer tokens that hercules-ci-agent presents during the WebSocket
-- handshake. Only the bcrypt hash is stored; the raw token is shown
-- to the admin once at creation time and never persisted.
CREATE TABLE IF NOT EXISTS cluster_join_tokens (
id TEXT PRIMARY KEY NOT NULL,
account_id TEXT NOT NULL REFERENCES accounts(id), -- Owning account; agent inherits this identity.
name TEXT NOT NULL, -- Admin-friendly label (e.g., "prod-agent-1").
token_hash TEXT NOT NULL, -- bcrypt hash of the raw bearer token.
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- ── Forges ───────────────────────────────────────────────────────────
-- A forge is an external code-hosting platform (GitHub, Gitea, etc.).
-- Webhook secrets and API credentials are stored in `config` (JSON).
CREATE TABLE IF NOT EXISTS forges (
id TEXT PRIMARY KEY NOT NULL,
forge_type TEXT NOT NULL, -- 'github' | 'gitea' | etc.
config TEXT NOT NULL, -- JSON blob with API URL, webhook secret, tokens, etc.
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- ── Repos ────────────────────────────────────────────────────────────
-- Mirror of a repository on a forge. Stores the clone URL and default
-- branch so agents know where to fetch code.
-- UNIQUE(forge_id, owner, name) prevents duplicate registrations of
-- the same repo from different webhook deliveries.
CREATE TABLE IF NOT EXISTS repos (
id TEXT PRIMARY KEY NOT NULL,
forge_id TEXT NOT NULL REFERENCES forges(id), -- Which forge this repo lives on.
owner TEXT NOT NULL, -- GitHub/Gitea user or org owning the repo.
name TEXT NOT NULL, -- Repository name (without owner prefix).
clone_url TEXT NOT NULL, -- HTTPS or SSH clone URL.
default_branch TEXT NOT NULL DEFAULT 'main', -- Used to decide if a push triggers effects.
created_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE(forge_id, owner, name)
);
-- ── Projects ─────────────────────────────────────────────────────────
-- A project binds an account to a repo. It is the primary grouping
-- entity for jobs, secrets, state files, and schedules.
-- `enabled` controls whether incoming webhooks create jobs.
CREATE TABLE IF NOT EXISTS projects (
id TEXT PRIMARY KEY NOT NULL,
account_id TEXT NOT NULL REFERENCES accounts(id), -- Owning account.
repo_id TEXT NOT NULL REFERENCES repos(id), -- Backing repository.
name TEXT NOT NULL UNIQUE, -- Human-readable project name.
enabled INTEGER NOT NULL DEFAULT 1, -- 1 = active, 0 = paused.
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- ── Agent Sessions ───────────────────────────────────────────────────
-- Each connected hercules-ci-agent has one row here. The session
-- records the agent's self-reported capabilities so the scheduler can
-- match tasks to capable agents.
--
-- `platforms` is a JSON array of Nix system strings, e.g.,
-- ["x86_64-linux", "aarch64-linux"].
-- `system_features` is a JSON array of required features, e.g.,
-- ["kvm", "big-parallel"].
CREATE TABLE IF NOT EXISTS agent_sessions (
id TEXT PRIMARY KEY NOT NULL,
account_id TEXT NOT NULL REFERENCES accounts(id), -- Account the agent authenticated as.
hostname TEXT NOT NULL, -- Self-reported hostname.
platforms TEXT NOT NULL, -- JSON array of Nix system strings.
system_features TEXT NOT NULL DEFAULT '[]', -- JSON array of system feature strings.
concurrency INTEGER NOT NULL DEFAULT 2, -- Max parallel builds this agent supports.
agent_version TEXT, -- Agent software version (informational).
nix_version TEXT, -- Nix version (informational).
connected_at TEXT NOT NULL DEFAULT (datetime('now')), -- When the WebSocket session started.
last_heartbeat TEXT NOT NULL DEFAULT (datetime('now')) -- Updated on each keepalive ping.
);
-- ── Jobs ─────────────────────────────────────────────────────────────
-- A job is a single CI run triggered by a push or PR event. It
-- progresses through:
-- pending -> evaluating -> building -> running_effects -> succeeded / failed
--
-- `sequence_number` is per-(project, ref) and monotonically increases.
-- Effects use it to ensure ordering: effects for sequence N cannot
-- start until all effects for sequence < N on the same ref are done.
--
-- Forge/repo metadata is denormalized for convenient display without
-- extra joins.
CREATE TABLE IF NOT EXISTS jobs (
id TEXT PRIMARY KEY NOT NULL,
project_id TEXT NOT NULL REFERENCES projects(id),
forge_type TEXT NOT NULL, -- Denormalized from forges.forge_type.
repo_owner TEXT NOT NULL, -- Denormalized from repos.owner.
repo_name TEXT NOT NULL, -- Denormalized from repos.name.
ref_name TEXT NOT NULL, -- Git ref (e.g., "refs/heads/main").
commit_sha TEXT NOT NULL, -- Full 40-char SHA.
status TEXT NOT NULL DEFAULT 'pending', -- Job lifecycle state.
sequence_number INTEGER NOT NULL DEFAULT 0, -- Per-(project, ref) ordering counter.
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Speeds up lookups by (project, ref) for the "latest job on branch"
-- query and for sequence-number computation.
CREATE INDEX IF NOT EXISTS idx_jobs_project_ref ON jobs(project_id, ref_name);
-- ── Task Queue ───────────────────────────────────────────────────────
-- Unified dispatch queue for all agent work: evaluation, build, and
-- effect tasks. Each task optionally specifies a required `platform`
-- so the scheduler can route it to a capable agent.
--
-- Lifecycle: pending -> running -> succeeded / failed
--
-- If an agent disconnects, its running tasks are reset to pending
-- (see `requeue_agent_tasks`).
CREATE TABLE IF NOT EXISTS task_queue (
id TEXT PRIMARY KEY NOT NULL,
job_id TEXT NOT NULL REFERENCES jobs(id), -- Owning job.
task_type TEXT NOT NULL, -- 'evaluation' | 'build' | 'effect'
status TEXT NOT NULL DEFAULT 'pending', -- 'pending' | 'running' | 'succeeded' | 'failed'
platform TEXT, -- Required Nix system (NULL = any agent).
required_features TEXT NOT NULL DEFAULT '[]', -- JSON array of required system features (future use).
payload TEXT NOT NULL, -- JSON blob; schema depends on task_type.
agent_session_id TEXT REFERENCES agent_sessions(id), -- Agent that claimed this task (NULL while pending).
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Speeds up `dequeue_task`: find the oldest pending task matching a platform.
CREATE INDEX IF NOT EXISTS idx_task_queue_status ON task_queue(status, platform);
-- ── Attributes (evaluation results) ─────────────────────────────────
-- During evaluation the agent walks the flake's `herculesCI` output
-- attribute tree and reports each attribute back. Each row records
-- the attribute path (JSON array), its type, an optional derivation
-- path, and any evaluation error.
CREATE TABLE IF NOT EXISTS attributes (
id TEXT PRIMARY KEY NOT NULL,
job_id TEXT NOT NULL REFERENCES jobs(id),
path TEXT NOT NULL, -- JSON array of path segments, e.g. '["onPush","default"]'.
derivation_path TEXT, -- /nix/store/…drv path, if this attr produces a derivation.
attribute_type TEXT NOT NULL DEFAULT 'regular', -- 'regular' | 'effect' | etc.
error TEXT, -- Evaluation error message, if any.
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Speeds up "get all attributes for a job" queries.
CREATE INDEX IF NOT EXISTS idx_attributes_job ON attributes(job_id);
-- ── Derivation Info ──────────────────────────────────────────────────
-- Stores Nix-level metadata from `nix show-derivation` so the
-- scheduler knows which platform a build targets without
-- re-evaluating.
--
-- `required_system_features` (JSON array) and `platform` are used to
-- match builds to agents. `input_derivations` (JSON array) lists
-- transitive build inputs. `outputs` (JSON object) maps output names
-- to store paths.
CREATE TABLE IF NOT EXISTS derivation_info (
id TEXT PRIMARY KEY NOT NULL,
job_id TEXT NOT NULL REFERENCES jobs(id),
derivation_path TEXT NOT NULL, -- /nix/store/…drv path.
platform TEXT NOT NULL, -- Nix system string, e.g. "x86_64-linux".
required_system_features TEXT NOT NULL DEFAULT '[]', -- JSON array, e.g. '["kvm"]'.
input_derivations TEXT NOT NULL DEFAULT '[]', -- JSON array of input .drv paths.
outputs TEXT NOT NULL DEFAULT '{}', -- JSON object: {"out": "/nix/store/…", …}.
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Speeds up "get all derivation info for a job" queries.
CREATE INDEX IF NOT EXISTS idx_derivation_info_job ON derivation_info(job_id);
-- ── Builds ───────────────────────────────────────────────────────────
-- Builds are **deduplicated by derivation path**. If two different
-- jobs require the same /nix/store/…drv, only one build record is
-- created. The many-to-many `build_jobs` table below tracks which
-- jobs share a build.
--
-- `INSERT OR IGNORE` on the UNIQUE derivation_path column implements
-- the deduplication (see `create_or_get_build`).
--
-- Lifecycle: pending -> building -> succeeded / failed / cancelled
CREATE TABLE IF NOT EXISTS builds (
id TEXT PRIMARY KEY NOT NULL,
derivation_path TEXT NOT NULL UNIQUE, -- Deduplication key.
status TEXT NOT NULL DEFAULT 'pending', -- Build lifecycle state.
agent_session_id TEXT REFERENCES agent_sessions(id), -- Agent that is building (NULL while pending).
started_at TEXT, -- Set when status becomes 'building'.
completed_at TEXT, -- Set when status reaches a terminal state.
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- ── Build-Jobs join table ────────────────────────────────────────────
-- Many-to-many relationship between builds and jobs. Because builds
-- are deduplicated, a single build can be shared across multiple jobs
-- (and even projects). This table lets the job controller query
-- "are all builds for job X done?".
CREATE TABLE IF NOT EXISTS build_jobs (
build_id TEXT NOT NULL REFERENCES builds(id),
job_id TEXT NOT NULL REFERENCES jobs(id),
PRIMARY KEY (build_id, job_id) -- Composite PK prevents duplicate links.
);
-- ── Effects ──────────────────────────────────────────────────────────
-- Effects are post-build side-effects (deploys, notifications, state
-- file updates) defined in the `herculesCI.onPush` output. They run
-- after all builds for a job complete.
--
-- Effects are serialised per (project, ref): effects for sequence
-- number N do not start until all effects for sequence < N on the
-- same ref have completed. This prevents overlapping deploys.
--
-- Lifecycle: pending -> running -> succeeded / failed / cancelled
CREATE TABLE IF NOT EXISTS effects (
id TEXT PRIMARY KEY NOT NULL,
job_id TEXT NOT NULL REFERENCES jobs(id),
attribute_path TEXT NOT NULL, -- JSON array of the Nix attribute path.
derivation_path TEXT NOT NULL, -- /nix/store/…drv path of the effect derivation.
status TEXT NOT NULL DEFAULT 'pending',
started_at TEXT, -- Set when status becomes 'running'.
completed_at TEXT, -- Set on terminal status.
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Speeds up "get all effects for a job" queries.
CREATE INDEX IF NOT EXISTS idx_effects_job ON effects(job_id);
-- ── State Files ──────────────────────────────────────────────────────
-- Implements the Hercules CI `hci state` feature: a key-value store
-- of versioned binary blobs scoped per project. Effects can read and
-- write state files to persist data across CI runs (e.g., Terraform
-- state, deployment manifests).
--
-- Each write bumps the `version` counter and replaces the `data` BLOB.
-- The composite primary key (project_id, name) enforces uniqueness.
CREATE TABLE IF NOT EXISTS state_files (
project_id TEXT NOT NULL REFERENCES projects(id),
name TEXT NOT NULL, -- User-defined state file name.
data BLOB NOT NULL, -- Raw binary payload.
version INTEGER NOT NULL DEFAULT 1, -- Monotonically increasing on each write.
size_bytes INTEGER NOT NULL DEFAULT 0, -- Cached size for listing without loading data.
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
PRIMARY KEY (project_id, name)
);
-- ── State Locks ──────────────────────────────────────────────────────
-- Distributed advisory locks with automatic lease expiry. Effects
-- acquire a lock before reading/writing a state file to prevent
-- concurrent modifications from parallel jobs.
--
-- The UNIQUE(project_id, name) constraint enforces mutual exclusion:
-- only one lock per (project, name) can exist at a time. Expired
-- locks are cleaned up lazily on acquire and periodically by a
-- background janitor.
CREATE TABLE IF NOT EXISTS state_locks (
id TEXT PRIMARY KEY NOT NULL,
project_id TEXT NOT NULL REFERENCES projects(id),
name TEXT NOT NULL, -- Lock name (typically matches the state file name).
owner TEXT NOT NULL, -- Free-form identifier of the lock holder.
expires_at TEXT NOT NULL, -- Lease expiry; after this time the lock is stale.
created_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE(project_id, name) -- At most one active lock per (project, name).
);
-- ── Secrets ──────────────────────────────────────────────────────────
-- Encrypted JSON blobs scoped to a project. Secrets are delivered to
-- the agent during effect execution when the `condition` matches
-- (e.g., "always", or only for pushes to the default branch).
--
-- The `data` column stores the secret payload as JSON text. At the
-- Rust layer it is wrapped in `Sensitive<_>` to prevent accidental
-- logging.
CREATE TABLE IF NOT EXISTS secrets (
id TEXT PRIMARY KEY NOT NULL,
project_id TEXT NOT NULL REFERENCES projects(id),
name TEXT NOT NULL, -- User-defined secret name.
data TEXT NOT NULL, -- JSON blob with the secret payload.
condition TEXT NOT NULL DEFAULT '"always"', -- JSON-serialized SecretCondition enum.
created_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE(project_id, name) -- One secret per name per project.
);
-- ── Log Entries ──────────────────────────────────────────────────────
-- Agents stream structured log lines while executing tasks (evaluation,
-- build, or effect). Each line has a zero-based index, a millisecond
-- timestamp, a message string, and a severity level.
--
-- Uses INTEGER PRIMARY KEY AUTOINCREMENT as a surrogate key (not UUID)
-- for insert performance on high-volume log streams.
CREATE TABLE IF NOT EXISTS log_entries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_id TEXT NOT NULL, -- The task producing these logs.
line_index INTEGER NOT NULL, -- Zero-based line number within the task.
timestamp_ms INTEGER NOT NULL, -- Milliseconds since epoch for the log line.
message TEXT NOT NULL, -- Log message content.
level TEXT NOT NULL DEFAULT 'info', -- 'debug' | 'info' | 'warn' | 'error'
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Speeds up paginated log retrieval: "get lines N..N+limit for task X".
CREATE INDEX IF NOT EXISTS idx_log_entries_task ON log_entries(task_id, line_index);
-- ── Schedules ────────────────────────────────────────────────────────
-- Cron-based job triggers. When enabled, the scheduler creates a new
-- job at the configured interval on the specified ref.
-- (Future feature -- not yet wired into the scheduler.)
CREATE TABLE IF NOT EXISTS schedules (
id TEXT PRIMARY KEY NOT NULL,
project_id TEXT NOT NULL REFERENCES projects(id),
cron_expression TEXT NOT NULL, -- Standard 5-field cron expression.
ref_name TEXT NOT NULL DEFAULT 'main', -- Git ref to evaluate.
enabled INTEGER NOT NULL DEFAULT 1, -- 1 = active, 0 = paused.
last_triggered_at TEXT, -- When the cron last fired.
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);

View file

@ -0,0 +1,14 @@
-- =======================================================================
-- Add password-based authentication for accounts
-- =======================================================================
--
-- The initial schema only supported agent authentication via cluster
-- join tokens (bcrypt-hashed bearer tokens). This migration adds a
-- `password_hash` column to the `accounts` table so that human users
-- can also authenticate with a username + password (bcrypt-hashed).
--
-- The column is nullable: accounts that authenticate exclusively via
-- forge OAuth (GitHub, Gitea, etc.) will leave it NULL. The auth
-- layer checks for NULL before attempting bcrypt verification.
ALTER TABLE accounts ADD COLUMN password_hash TEXT;

View file

@ -0,0 +1,693 @@
//! # StorageBackend -- the database abstraction trait
//!
//! Every server component that needs to persist or query data depends on
//! this trait rather than on a concrete database implementation. This
//! inversion allows:
//!
//! 1. Swapping SQLite for PostgreSQL via a feature flag.
//! 2. Using an in-memory SQLite database in integration tests.
//! 3. Eventually mocking the trait in unit tests.
//!
//! The trait surface is organised into sections that match the Hercules CI
//! data model. Each section corresponds to one or more SQL tables (see
//! the migration files for the full schema).
//!
//! ## Hercules CI pipeline overview
//!
//! ```text
//! webhook / push event
//! --> create Job (status: pending)
//! --> enqueue evaluation Task
//! --> agent dequeues & evaluates the flake
//! --> store Attributes + DerivationInfo
//! --> create or deduplicate Builds
//! --> enqueue build Tasks (one per unique derivation)
//! --> agents build; when all builds complete:
//! --> create & run Effects (side-effects like deploys)
//! --> job marked succeeded / failed
//! ```
use async_trait::async_trait;
use jupiter_api_types::{
Account, AccountType, AgentHello, AgentSession, AttributeResult, AttributeType, Build,
BuildStatus, ClusterJoinToken, Effect, EffectStatus, ForgeType, Job, JobStatus, JobSummary,
LogEntry, Project, Repo, Secret, SecretCondition, StateFile, StateLock, TaskStatus, TaskType,
};
use uuid::Uuid;
use crate::error::Result;
/// Async trait that abstracts all database operations for the Jupiter
/// server.
///
/// Implementations must be `Send + Sync + 'static` so they can be shared
/// across Tokio tasks behind an `Arc`.
///
/// All IDs are passed as raw [`Uuid`] values. The API layer is
/// responsible for wrapping/unwrapping the phantom-typed [`Id<T>`] from
/// `jupiter-api-types`.
#[async_trait]
pub trait StorageBackend: Send + Sync + 'static {
// ── Initialization ───────────────────────────────────────────────
/// Run all pending sqlx migrations against the connected database.
///
/// Called once at server startup. Migration files live in
/// `crates/jupiter-db/migrations/` and are embedded at compile time
/// by the `sqlx::migrate!` macro.
async fn run_migrations(&self) -> Result<()>;
// ── Accounts ─────────────────────────────────────────────────────
//
// An Account is the top-level ownership entity. In Hercules CI an
// account can be a user or an organisation. Projects, join tokens,
// and agent sessions all belong to an account.
/// Create a new account with the given display name and type.
///
/// Returns `DbError::Sqlx` if the name violates the UNIQUE constraint.
async fn create_account(&self, name: &str, typ: AccountType) -> Result<Account>;
/// Fetch a single account by its primary-key UUID.
async fn get_account(&self, id: Uuid) -> Result<Account>;
/// Fetch a single account by its unique display name.
///
/// Used during login and API-key resolution where the caller only
/// knows the account name.
async fn get_account_by_name(&self, name: &str) -> Result<Account>;
/// Return the bcrypt-hashed password for the account, if one has been
/// set. Returns `Ok(None)` for accounts that authenticate exclusively
/// via forge OAuth or that have not yet set a password.
async fn get_account_password_hash(&self, name: &str) -> Result<Option<String>>;
/// Set (or replace) the bcrypt-hashed password for an account.
///
/// The hash is stored as an opaque string; the caller is responsible
/// for hashing with an appropriate cost factor before calling this
/// method.
async fn set_account_password_hash(&self, id: Uuid, password_hash: &str) -> Result<()>;
/// List every account, ordered by creation time.
async fn list_accounts(&self) -> Result<Vec<Account>>;
// ── Cluster Join Tokens ──────────────────────────────────────────
//
// When an `hercules-ci-agent` first connects it presents a bearer
// token. The server looks up the matching bcrypt hash in this table
// to authenticate the agent and associate it with an account.
//
// Tokens are one-way hashed (bcrypt) so a database leak does not
// expose credentials.
/// Persist a new join token.
///
/// `token_hash` is the bcrypt hash of the raw bearer token that was
/// shown to the admin at creation time. The raw token is never
/// stored.
async fn create_cluster_join_token(
&self,
account_id: Uuid,
name: &str,
token_hash: &str,
) -> Result<ClusterJoinToken>;
/// List all join tokens belonging to an account (hash excluded).
async fn list_cluster_join_tokens(&self, account_id: Uuid) -> Result<Vec<ClusterJoinToken>>;
/// Retrieve the bcrypt hash for a specific token by its UUID.
///
/// Used during agent authentication when the token ID is already
/// known.
async fn get_cluster_join_token_hash(&self, token_id: Uuid) -> Result<String>;
/// Delete (revoke) a cluster join token.
///
/// Active agent sessions authenticated with this token are **not**
/// automatically terminated -- they remain valid until their next
/// re-authentication attempt.
async fn delete_cluster_join_token(&self, token_id: Uuid) -> Result<()>;
/// Return all `(token_id, bcrypt_hash)` pairs for an account so the
/// authentication layer can try each hash against the presented bearer
/// token.
///
/// This linear scan is acceptable because each account typically has
/// only a handful of join tokens.
async fn find_cluster_join_token_by_hash(
&self,
account_id: Uuid,
) -> Result<Vec<(Uuid, String)>>;
// ── Agent Sessions ───────────────────────────────────────────────
//
// Each connected `hercules-ci-agent` has exactly one session row.
// The session records the agent's platform capabilities (e.g.,
// `x86_64-linux`, `aarch64-darwin`), system features, and
// concurrency limit. The scheduler uses this information to match
// tasks to capable agents.
/// Register a newly-connected agent.
///
/// The [`AgentHello`] payload contains the agent's self-reported
/// capabilities (platforms, system features, concurrency, versions).
async fn create_agent_session(
&self,
agent_hello: &AgentHello,
account_id: Uuid,
) -> Result<AgentSession>;
/// Fetch a single agent session by UUID.
async fn get_agent_session(&self, id: Uuid) -> Result<AgentSession>;
/// List all currently-registered agent sessions.
async fn list_agent_sessions(&self) -> Result<Vec<AgentSession>>;
/// Bump the `last_heartbeat` timestamp for a connected agent.
///
/// The server uses heartbeat age to detect stale sessions (agents
/// that disconnected without a clean goodbye).
async fn update_agent_heartbeat(&self, id: Uuid) -> Result<()>;
/// Remove an agent session (agent disconnected or timed out).
///
/// Any tasks still assigned to this agent should be requeued
/// separately via [`requeue_agent_tasks`](Self::requeue_agent_tasks).
async fn delete_agent_session(&self, id: Uuid) -> Result<()>;
/// Find all agent sessions whose `platforms` JSON array contains the
/// given platform string (e.g. `"x86_64-linux"`).
///
/// Used by the scheduler to determine which agents can run a task
/// that requires a specific platform.
async fn get_active_agent_sessions_for_platform(
&self,
platform: &str,
) -> Result<Vec<AgentSession>>;
// ── Repos ────────────────────────────────────────────────────────
//
// A Repo is a mirror of a repository on an external forge (GitHub,
// Gitea, etc.). It stores the clone URL and default branch so the
// agent knows where to fetch code. Repos are unique per
// (forge, owner, name) triple.
/// Register a repository from a forge.
async fn create_repo(
&self,
forge_id: Uuid,
owner: &str,
name: &str,
clone_url: &str,
default_branch: &str,
) -> Result<Repo>;
/// Fetch a repository by its primary-key UUID.
async fn get_repo(&self, id: Uuid) -> Result<Repo>;
/// Look up a repository by its forge-side identity (forge + owner + name).
///
/// Returns `None` if no matching repo has been registered yet.
/// Used during webhook processing to find or create the repo.
async fn find_repo(
&self,
forge_id: Uuid,
owner: &str,
name: &str,
) -> Result<Option<Repo>>;
/// List all repositories associated with a given forge.
async fn list_repos(&self, forge_id: Uuid) -> Result<Vec<Repo>>;
// ── Projects ─────────────────────────────────────────────────────
//
// A Project binds an Account to a Repo and serves as the grouping
// entity for jobs, state files, secrets, and schedules. This is
// the primary unit the user interacts with in the Hercules CI
// dashboard.
/// Create a new project owned by `account_id` and backed by `repo_id`.
///
/// Projects are enabled by default. Disabled projects ignore
/// incoming webhooks.
async fn create_project(
&self,
account_id: Uuid,
repo_id: Uuid,
name: &str,
) -> Result<Project>;
/// Fetch a project by primary key.
async fn get_project(&self, id: Uuid) -> Result<Project>;
/// Fetch a project by its unique display name.
async fn get_project_by_name(&self, name: &str) -> Result<Project>;
/// Toggle the `enabled` flag on a project and return the updated row.
///
/// Disabled projects will not create new jobs when webhooks arrive.
async fn update_project(&self, id: Uuid, enabled: bool) -> Result<Project>;
/// List all projects, ordered by creation time.
async fn list_projects(&self) -> Result<Vec<Project>>;
/// Find the project (if any) that is linked to the given repo.
///
/// At most one project can point to each repo. Used during webhook
/// processing to route an event to the correct project.
async fn find_project_by_repo(&self, repo_id: Uuid) -> Result<Option<Project>>;
// ── Jobs ─────────────────────────────────────────────────────────
//
// A Job represents a single CI run triggered by a push or pull
// request event. It progresses through:
//
// pending -> evaluating -> building -> running_effects -> succeeded / failed
//
// Each job belongs to exactly one project and is identified by a
// per-(project, ref) monotonically-increasing sequence number.
/// Create a new job in `pending` status.
///
/// Automatically assigns the next sequence number for the given
/// (project, ref) pair. The `forge_type`, `repo_owner`, and
/// `repo_name` are denormalized from the project's repo for
/// convenient display and webhook status reporting.
async fn create_job(
&self,
project_id: Uuid,
forge_type: ForgeType,
repo_owner: &str,
repo_name: &str,
ref_name: &str,
commit_sha: &str,
) -> Result<Job>;
/// Fetch a job by primary key.
async fn get_job(&self, id: Uuid) -> Result<Job>;
/// Transition a job to the given status and bump `updated_at`.
async fn update_job_status(&self, id: Uuid, status: JobStatus) -> Result<()>;
/// Paginated listing of jobs for a project, newest first.
///
/// Returns `(summaries, total_count)` so the API can set pagination
/// headers.
async fn list_jobs_for_project(
&self,
project_id: Uuid,
page: u64,
per_page: u64,
) -> Result<(Vec<JobSummary>, u64)>;
/// Return the most recent job for a (project, ref) pair, by sequence
/// number.
///
/// Used to determine whether a new push supersedes an in-progress
/// job on the same branch.
async fn get_latest_job_for_ref(
&self,
project_id: Uuid,
ref_name: &str,
) -> Result<Option<Job>>;
/// Compute the next sequence number for a (project, ref) pair.
///
/// Sequence numbers start at 1 and monotonically increase. They are
/// used to order effects: an effect for sequence N will not run until
/// all effects for sequences < N on the same ref have completed.
async fn get_next_sequence_number(
&self,
project_id: Uuid,
ref_name: &str,
) -> Result<i64>;
// ── Task Queue ───────────────────────────────────────────────────
//
// The task queue is a unified dispatch mechanism. Evaluation, build,
// and effect tasks all live in the same `task_queue` table. Each
// task optionally specifies a required `platform` (e.g.,
// `x86_64-linux`) so the scheduler can route it to a capable agent.
//
// Tasks flow through: pending -> running -> succeeded / failed
//
// If an agent disconnects, its running tasks are requeued to pending
// so another agent can pick them up.
/// Insert a new task into the queue in `pending` status.
///
/// `platform` may be `None` for tasks that can run on any agent
/// (e.g., evaluation of platform-independent expressions).
/// `payload` is an opaque JSON blob whose schema depends on
/// `task_type`.
async fn enqueue_task(
&self,
job_id: Uuid,
task_type: TaskType,
platform: Option<&str>,
payload: &serde_json::Value,
) -> Result<Uuid>;
/// Atomically dequeue the oldest pending task that matches the given
/// platform.
///
/// The task is moved to `running` status inside a transaction so that
/// concurrent agents cannot claim the same task. Returns `None` if
/// no matching task is available.
///
/// `system_features` is accepted for future feature-matching but is
/// not yet used in the query.
async fn dequeue_task(
&self,
platform: &str,
system_features: &[String],
) -> Result<Option<(Uuid, TaskType, serde_json::Value)>>;
/// Update the status of a task and optionally record which agent
/// session is handling it.
async fn update_task_status(
&self,
task_id: Uuid,
status: TaskStatus,
agent_session_id: Option<Uuid>,
) -> Result<()>;
/// Retrieve a task's full metadata (id, type, status, payload).
async fn get_task(
&self,
task_id: Uuid,
) -> Result<(Uuid, TaskType, TaskStatus, serde_json::Value)>;
/// Reset all `running` tasks owned by the given agent session back
/// to `pending`.
///
/// Called when an agent disconnects unexpectedly so that its
/// in-flight work is retried by another agent. Returns the list
/// of task IDs that were requeued.
async fn requeue_agent_tasks(&self, agent_session_id: Uuid) -> Result<Vec<Uuid>>;
/// Look up which job a task belongs to.
///
/// Used by the agent protocol handler to route task results back to
/// the originating job.
async fn get_task_job_id(&self, task_id: Uuid) -> Result<Uuid>;
// ── Evaluations / Attributes ─────────────────────────────────────
//
// During evaluation the agent walks the flake's `herculesCI` output
// attribute tree. Each discovered attribute is recorded here,
// along with its derivation path (if it produces one) and type.
//
// DerivationInfo stores Nix-level metadata from `nix show-derivation`
// so the scheduler knows which platform a build needs without
// re-evaluating.
/// Record a single attribute discovered during evaluation.
///
/// `path` is the Nix attribute path as a list of segments (e.g.,
/// `["herculesCI", "ciSystems", "x86_64-linux", "default"]`).
/// `derivation_path` is the `/nix/store/...drv` path, if this
/// attribute produces a derivation.
async fn store_attribute(
&self,
job_id: Uuid,
path: &[String],
derivation_path: Option<&str>,
typ: AttributeType,
error: Option<&str>,
) -> Result<()>;
/// Store Nix derivation metadata obtained from `nix show-derivation`.
///
/// `platform` (e.g. `"x86_64-linux"`) and `required_system_features`
/// (e.g. `["kvm"]`) are used by the scheduler to match builds to
/// agents. `input_derivations` lists transitive build dependencies.
/// `outputs` is the JSON map of output names to store paths.
async fn store_derivation_info(
&self,
job_id: Uuid,
derivation_path: &str,
platform: &str,
required_system_features: &[String],
input_derivations: &[String],
outputs: &serde_json::Value,
) -> Result<()>;
/// Retrieve all attributes recorded for a job's evaluation.
async fn get_evaluation_attributes(&self, job_id: Uuid) -> Result<Vec<AttributeResult>>;
/// Return every unique derivation path discovered during a job's
/// evaluation.
///
/// Used after evaluation completes to create the corresponding
/// build records.
async fn get_derivation_paths_for_job(&self, job_id: Uuid) -> Result<Vec<String>>;
/// Look up the target platform for a given derivation path.
///
/// Returns `None` if the derivation has not been recorded (e.g., it
/// was a dependency that was not evaluated in this job). The
/// scheduler calls this to decide which agent platform can build
/// the derivation.
async fn get_derivation_platform(
&self,
derivation_path: &str,
) -> Result<Option<String>>;
// ── Builds ───────────────────────────────────────────────────────
//
// Builds are **deduplicated by derivation path**. If two different
// jobs (or even two different projects) need the same
// `/nix/store/...drv`, only one build record is created. The
// `build_jobs` join table tracks which jobs share a build so their
// statuses can all be updated when the build completes.
//
// Build lifecycle: pending -> building -> succeeded / failed / cancelled
/// Insert a new build for `derivation_path`, or return the existing
/// build if one already exists (deduplication).
///
/// Returns `(build_id, was_created)`. `was_created` is `false` when
/// the derivation was already known, meaning no new work needs to be
/// scheduled.
async fn create_or_get_build(&self, derivation_path: &str) -> Result<(Uuid, bool)>;
/// Fetch a build by primary key.
async fn get_build(&self, id: Uuid) -> Result<Build>;
/// Look up a build by its derivation path.
async fn get_build_by_drv_path(&self, derivation_path: &str) -> Result<Option<Build>>;
/// Transition a build's status and optionally record the building
/// agent.
///
/// Automatically sets `started_at` when entering `Building` and
/// `completed_at` when entering a terminal status.
async fn update_build_status(
&self,
id: Uuid,
status: BuildStatus,
agent_session_id: Option<Uuid>,
) -> Result<()>;
/// Associate a build with a job (many-to-many).
///
/// Silently succeeds if the link already exists (`INSERT OR IGNORE`).
async fn link_build_to_job(&self, build_id: Uuid, job_id: Uuid) -> Result<()>;
/// Check whether every build linked to a job has reached a terminal
/// status (`succeeded`, `failed`, or `cancelled`).
///
/// The job controller calls this after each build status update to
/// decide whether to advance the job to the effects phase.
async fn are_all_builds_complete(&self, job_id: Uuid) -> Result<bool>;
// ── Effects ──────────────────────────────────────────────────────
//
// Effects are post-build side-effects (deploys, notifications,
// state-file updates, etc.) defined in the `herculesCI.onPush`
// output. They are serialised: for a given (project, ref), effects
// for sequence number N do not start until all effects for
// sequence < N have completed. This prevents overlapping deploys.
//
// Effect lifecycle: pending -> running -> succeeded / failed / cancelled
/// Create a new effect record for a job.
async fn create_effect(
&self,
job_id: Uuid,
attribute_path: &[String],
derivation_path: &str,
) -> Result<Uuid>;
/// Fetch an effect by primary key.
async fn get_effect(&self, id: Uuid) -> Result<Effect>;
/// Look up an effect by its (job, attribute_path) pair.
///
/// `attribute_path` is the JSON-serialized path string.
async fn get_effect_by_job_and_attr(&self, job_id: Uuid, attribute_path: &str) -> Result<Effect>;
/// List all effects associated with a job, ordered by creation time.
async fn get_effects_for_job(&self, job_id: Uuid) -> Result<Vec<Effect>>;
/// Transition an effect's status.
///
/// Automatically sets `started_at` when entering `Running` and
/// `completed_at` when entering a terminal status.
async fn update_effect_status(&self, id: Uuid, status: EffectStatus) -> Result<()>;
/// Check whether every effect for a job has reached a terminal status.
async fn are_all_effects_complete(&self, job_id: Uuid) -> Result<bool>;
/// Check whether all effects from earlier sequence numbers on the
/// same (project, ref) have completed.
///
/// Used to enforce the serialisation invariant: effects for a newer
/// push must wait until previous pushes' effects have finished.
/// This prevents concurrent deploys from the same branch.
async fn are_preceding_effects_done(
&self,
project_id: Uuid,
ref_name: &str,
sequence_number: i64,
) -> Result<bool>;
// ── State Files ──────────────────────────────────────────────────
//
// State files implement the Hercules CI `hci state` feature: a
// key-value store of versioned binary blobs scoped per project.
// Effects can read/write these files to persist data across CI runs
// (e.g., Terraform state, deployment manifests).
//
// Each write bumps the version counter and replaces the data.
// The version number enables optimistic-concurrency checks in
// higher-level code.
/// Insert or update a state file.
///
/// Uses `INSERT ... ON CONFLICT DO UPDATE` so that the first write
/// creates the row at version 1, and subsequent writes atomically
/// increment the version.
async fn put_state_file(
&self,
project_id: Uuid,
name: &str,
data: &[u8],
) -> Result<()>;
/// Retrieve the raw bytes of a state file.
///
/// Returns `None` if the file has never been written.
async fn get_state_file(
&self,
project_id: Uuid,
name: &str,
) -> Result<Option<Vec<u8>>>;
/// List all state files for a project (metadata only, no data blobs).
async fn list_state_files(&self, project_id: Uuid) -> Result<Vec<StateFile>>;
// ── State Locks ──────────────────────────────────────────────────
//
// Distributed advisory locks with automatic lease expiry. Effects
// acquire a lock before reading/writing a state file to prevent
// concurrent modifications from parallel jobs.
//
// The UNIQUE(project_id, name) constraint on the `state_locks`
// table ensures mutual exclusion at the database level. Expired
// locks are cleaned up lazily (on acquire) and periodically via
// `cleanup_expired_locks`.
/// Attempt to acquire a named lock for a project.
///
/// First deletes any expired lock for the same (project, name) pair,
/// then tries `INSERT OR IGNORE`. Returns `DbError::Conflict` if
/// the lock is held by another owner and has not expired.
///
/// `owner` is a free-form string identifying the holder (typically
/// the agent session ID or effect ID). `ttl_seconds` controls the
/// lease duration.
async fn acquire_lock(
&self,
project_id: Uuid,
name: &str,
owner: &str,
ttl_seconds: u64,
) -> Result<StateLock>;
/// Extend the lease of an existing lock.
///
/// Useful for long-running effects that need to hold a lock beyond
/// the initial TTL without releasing and re-acquiring.
async fn renew_lock(&self, lock_id: Uuid, ttl_seconds: u64) -> Result<StateLock>;
/// Explicitly release a lock before it expires.
async fn release_lock(&self, lock_id: Uuid) -> Result<()>;
/// Delete all locks whose `expires_at` is in the past.
///
/// Returns the number of expired locks removed. Called periodically
/// by a background janitor task.
async fn cleanup_expired_locks(&self) -> Result<u64>;
// ── Secrets ──────────────────────────────────────────────────────
//
// Secrets are JSON blobs scoped to a project. They are delivered
// to the agent during effect execution when the `condition` matches
// (e.g., only on the default branch).
//
// The `data` column stores the secret payload as JSON text.
// At the Rust level it is wrapped in `Sensitive<_>` to prevent
// accidental logging.
/// Create a new project secret.
///
/// `data` is an opaque JSON value (typically `{"key": "value"}`
/// pairs). `condition` controls when the secret is available --
/// e.g., only for pushes to the default branch.
async fn create_secret(
&self,
project_id: Uuid,
name: &str,
data: &serde_json::Value,
condition: &SecretCondition,
) -> Result<Uuid>;
/// List all secrets for a project (including their data).
///
/// The caller is responsible for filtering based on `condition`
/// before sending secrets to an agent.
async fn get_secrets_for_project(&self, project_id: Uuid) -> Result<Vec<Secret>>;
/// Delete a secret by its UUID.
async fn delete_secret(&self, id: Uuid) -> Result<()>;
// ── Log Entries ──────────────────────────────────────────────────
//
// Agents stream structured log lines while executing tasks. Each
// line has a zero-based index, a millisecond timestamp, a message,
// and a severity level. The dashboard uses these to display
// real-time build/effect logs.
/// Batch-insert log lines for a task.
///
/// Runs inside a transaction for atomicity. Idempotent if lines
/// with the same `(task_id, line_index)` are inserted again
/// (assuming the table allows it; currently no unique constraint
/// on the pair, so duplicates are possible if the agent retries).
async fn store_log_entries(
&self,
task_id: Uuid,
entries: &[LogEntry],
) -> Result<()>;
/// Retrieve a page of log entries for a task, ordered by line index.
async fn get_log_entries(
&self,
task_id: Uuid,
offset: u64,
limit: u64,
) -> Result<Vec<LogEntry>>;
}

View file

@ -0,0 +1,58 @@
//! # Database error types for jupiter-db
//!
//! Provides a unified [`DbError`] enum that every [`crate::backend::StorageBackend`]
//! method returns. The variants cover the four failure modes that callers
//! need to distinguish:
//!
//! - **Sqlx** -- low-level driver or connection-pool errors (timeouts,
//! constraint violations not otherwise mapped, etc.).
//! - **NotFound** -- the requested entity does not exist. The HTTP layer
//! typically maps this to `404 Not Found`.
//! - **Conflict** -- a uniqueness or locking constraint was violated
//! (e.g., trying to acquire a state lock that is already held). Maps
//! to `409 Conflict`.
//! - **Migration** -- schema migration failed on startup. Fatal.
//! - **Serialization** -- a JSON column could not be serialized or
//! deserialized (e.g., the `platforms` JSON array in `agent_sessions`).
use thiserror::Error;
/// Crate-level error type returned by every [`crate::backend::StorageBackend`] method.
///
/// The variants carry enough context for the API layer to choose an
/// appropriate HTTP status code without inspecting error messages.
#[derive(Debug, Error)]
pub enum DbError {
/// A low-level sqlx driver error (connection failure, unexpected SQL
/// error, protocol parse issue, etc.).
#[error("database error: {0}")]
Sqlx(#[from] sqlx::Error),
/// The requested entity was not found.
///
/// `entity` is a human-readable table/concept name (e.g. `"account"`,
/// `"build"`). `id` is whatever key was used for the lookup.
#[error("not found: {entity} with id {id}")]
NotFound { entity: String, id: String },
/// A uniqueness or mutual-exclusion constraint was violated.
///
/// Currently used by [`crate::backend::StorageBackend::acquire_lock`]
/// when the lock is already held by another owner.
#[error("conflict: {0}")]
Conflict(String),
/// A sqlx migration failed. This is treated as fatal at startup.
#[error("migration error: {0}")]
Migration(#[from] sqlx::migrate::MigrateError),
/// JSON serialization or deserialization failed for a column that
/// stores structured data (e.g., `platforms`, `system_features`,
/// `attribute_path`, `condition`).
#[error("serialization error: {0}")]
Serialization(#[from] serde_json::Error),
}
/// Convenience alias used throughout the crate so that every function
/// signature can simply return `Result<T>`.
pub type Result<T> = std::result::Result<T, DbError>;

View file

@ -0,0 +1,46 @@
//! # jupiter-db -- Persistence layer for Jupiter
//!
//! Jupiter is a self-hosted, wire-compatible replacement for
//! [hercules-ci.com](https://hercules-ci.com). This crate owns every
//! database interaction: schema migrations, CRUD operations, and the
//! task-queue that drives the eval-build-effects pipeline.
//!
//! ## Architecture
//!
//! All server components depend on the [`backend::StorageBackend`] async
//! trait rather than on a concrete database driver. Today the only
//! implementation is [`sqlite::SqliteBackend`] (the default), but the
//! trait is designed so that a PostgreSQL backend can be added behind a
//! feature flag without touching any calling code.
//!
//! ## Modules
//!
//! | Module | Purpose |
//! |-------------|---------|
//! | [`backend`] | Defines the `StorageBackend` trait -- the public contract. |
//! | [`error`] | Crate-level error and `Result` types. |
//! | [`sqlite`] | SQLite implementation of `StorageBackend` via sqlx. |
//!
//! ## Data model overview
//!
//! The schema mirrors the Hercules CI object model:
//!
//! ```text
//! Account
//! +-- ClusterJoinToken (agent authentication)
//! +-- Project
//! +-- Repo (forge-side repository reference)
//! +-- Job (one per push / PR event)
//! | +-- Attribute (evaluation output)
//! | +-- DerivationInfo (platform & inputs metadata)
//! | +-- Build (deduplicated by drv path)
//! | +-- Effect (post-build side-effects)
//! | +-- TaskQueue (unified dispatch to agents)
//! +-- StateFile (versioned binary blobs for `hci state`)
//! +-- StateLock (distributed lock with lease expiry)
//! +-- Secret (encrypted per-project secrets)
//! ```
pub mod backend;
pub mod error;
pub mod sqlite;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,19 @@
[package]
name = "jupiter-forge"
version.workspace = true
edition.workspace = true
[dependencies]
jupiter-api-types = { workspace = true }
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
thiserror = { workspace = true }
async-trait = { workspace = true }
tracing = { workspace = true }
hmac = { workspace = true }
sha2 = { workspace = true }
hex = { workspace = true }
chrono = { workspace = true }
uuid = { workspace = true }

View file

@ -0,0 +1,65 @@
//! Error types for the forge integration layer.
//!
//! [`ForgeError`] is the single error enum shared by all forge providers
//! (GitHub, Gitea, Radicle). It covers the full range of failure modes that
//! can occur during webhook verification, payload parsing, and outbound API
//! calls.
//!
//! The enum uses [`thiserror`] for ergonomic `Display` / `Error` derivation
//! and provides automatic `From` conversions for the two most common
//! underlying error types: [`reqwest::Error`] (HTTP client failures) and
//! [`serde_json::Error`] (JSON deserialization failures).
use thiserror::Error;
/// Unified error type for all forge operations.
///
/// Each variant maps to a distinct failure category so that callers in the
/// Jupiter server can decide how to respond (e.g. return HTTP 401 for
/// `InvalidSignature`, HTTP 400 for `ParseError`, HTTP 502 for `ApiError`).
#[derive(Debug, Error)]
pub enum ForgeError {
/// The webhook signature was structurally valid but did not match the
/// expected HMAC. The server should reject the request with HTTP 401.
///
/// Note: this variant is distinct from `verify_webhook` returning
/// `Ok(false)`. `Ok(false)` means the signature was absent or wrong;
/// `InvalidSignature` signals a structural problem detected during
/// verification (e.g. the HMAC could not be initialized).
#[error("invalid webhook signature")]
InvalidSignature,
/// The webhook carried an event type that this provider does not handle
/// and considers an error (as opposed to returning `Ok(None)` for
/// silently ignored events).
#[error("unsupported event type: {0}")]
UnsupportedEvent(String),
/// The webhook payload could not be deserialized or was missing required
/// fields. Also used for malformed signature headers (e.g. a GitHub
/// signature without the `sha256=` prefix).
#[error("parse error: {0}")]
ParseError(String),
/// An outbound API call to the forge succeeded at the HTTP level but
/// returned a non-success status code (4xx / 5xx). The string contains
/// the status code and response body for diagnostics.
#[error("API error: {0}")]
ApiError(String),
/// The underlying HTTP client (reqwest) encountered a transport-level
/// error (DNS failure, timeout, TLS error, etc.).
#[error("HTTP error: {0}")]
HttpError(#[from] reqwest::Error),
/// JSON serialization or deserialization failed. Automatically converted
/// from `serde_json::Error`.
#[error("JSON error: {0}")]
JsonError(#[from] serde_json::Error),
/// The provider was asked to perform an operation that requires
/// configuration it does not have. For example, calling
/// `poll_changes` on a Radicle provider that is in webhook mode.
#[error("not configured: {0}")]
NotConfigured(String),
}

View file

@ -0,0 +1,441 @@
//! Gitea / Forgejo forge provider for Jupiter CI.
//!
//! This module implements the [`ForgeProvider`] trait for Gitea (and its
//! community fork Forgejo), handling:
//!
//! - **Webhook verification** using HMAC-SHA256 with the `X-Gitea-Signature`
//! header. Unlike GitHub, Gitea sends the raw hex digest **without** a
//! `sha256=` prefix. The constant-time comparison logic is otherwise
//! identical to the GitHub provider.
//!
//! - **Webhook parsing** for `push` and `pull_request` events (via the
//! `X-Gitea-Event` header). Unrecognized event types are silently ignored.
//!
//! - **Commit status reporting** via `POST /api/v1/repos/{owner}/{repo}/statuses/{sha}`.
//!
//! - **Repository listing** via `GET /api/v1/repos/search`.
//!
//! ## Authentication Model
//!
//! Gitea uses **personal access tokens** (or OAuth2 tokens) for API
//! authentication. These are passed in the `Authorization: token <value>`
//! header -- note that Gitea uses the literal word `token` rather than
//! `Bearer` as the scheme.
//!
//! ## Differences from GitHub
//!
//! | Aspect | GitHub | Gitea |
//! |-----------------------|---------------------------------|----------------------------------|
//! | Signature header | `X-Hub-Signature-256` | `X-Gitea-Signature` |
//! | Signature format | `sha256=<hex>` | `<hex>` (no prefix) |
//! | Auth header | `Authorization: Bearer <tok>` | `Authorization: token <tok>` |
//! | API path prefix | `/repos/...` | `/api/v1/repos/...` |
//! | PR sync action name | `"synchronize"` | `"synchronized"` (extra "d") |
//! | User field name | Always `login` | `login` or `username` (varies) |
//! | PR ref fields | Always present | Optional (may be `null`) |
//!
//! The `"synchronize"` vs `"synchronized"` difference is a notable pitfall:
//! GitHub uses `"synchronize"` (no trailing "d") while Gitea uses
//! `"synchronized"` (with trailing "d"). Both mean "new commits were pushed
//! to the PR branch."
use async_trait::async_trait;
use hmac::{Hmac, Mac};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use sha2::Sha256;
use tracing::{debug, warn};
use crate::error::ForgeError;
use crate::{ForgeProvider, RawForgeEvent};
use jupiter_api_types::{CommitStatus, CommitStatusUpdate, ForgeType, PullRequestAction};
/// Type alias for HMAC-SHA256, used for webhook signature verification.
type HmacSha256 = Hmac<Sha256>;
// ---------------------------------------------------------------------------
// Internal serde types for Gitea webhook payloads
// ---------------------------------------------------------------------------
//
// Gitea's webhook payloads are similar to GitHub's but differ in several
// structural details (see module-level docs). These structs capture only the
// fields Jupiter needs.
/// Payload for Gitea `push` events.
#[derive(Debug, Deserialize)]
struct GiteaPushPayload {
#[serde(rename = "ref")]
git_ref: String,
before: String,
after: String,
repository: GiteaRepo,
sender: GiteaUser,
}
/// Repository object embedded in Gitea webhook payloads.
#[derive(Debug, Deserialize)]
struct GiteaRepo {
owner: GiteaUser,
name: String,
#[allow(dead_code)]
clone_url: Option<String>,
}
/// User object in Gitea payloads.
///
/// Gitea is inconsistent about which field it populates: some payloads use
/// `login`, others use `username`. The [`name()`](GiteaUser::name) helper
/// tries both, falling back to `"unknown"`.
#[derive(Debug, Deserialize)]
struct GiteaUser {
login: Option<String>,
username: Option<String>,
}
impl GiteaUser {
/// Extract a usable display name, preferring `login` over `username`.
///
/// Gitea populates different fields depending on the API version and
/// context, so we try both.
fn name(&self) -> String {
self.login
.as_deref()
.or(self.username.as_deref())
.unwrap_or("unknown")
.to_string()
}
}
/// Payload for Gitea `pull_request` events.
#[derive(Debug, Deserialize)]
struct GiteaPRPayload {
action: String,
number: u64,
pull_request: GiteaPR,
repository: GiteaRepo,
}
/// The `pull_request` object inside a Gitea PR event.
#[derive(Debug, Deserialize)]
struct GiteaPR {
head: GiteaPRRef,
base: GiteaPRRef,
}
/// A ref endpoint (head or base) of a Gitea pull request.
///
/// Unlike GitHub where `sha` and `ref` are always present, Gitea may return
/// `null` for these fields in some edge cases (e.g. deleted branches), so
/// they are `Option<String>`. The `label` field serves as a fallback for
/// `ref_name` in the base ref.
#[derive(Debug, Deserialize)]
struct GiteaPRRef {
sha: Option<String>,
#[serde(rename = "ref")]
ref_name: Option<String>,
label: Option<String>,
}
/// Request body for `POST /api/v1/repos/{owner}/{repo}/statuses/{sha}`.
///
/// The field names and semantics mirror GitHub's status API, which Gitea
/// intentionally replicates for compatibility.
#[derive(Debug, Serialize)]
struct GiteaStatusRequest {
state: String,
context: String,
description: String,
#[serde(skip_serializing_if = "Option::is_none")]
target_url: Option<String>,
}
/// Minimal repo item from the Gitea search API response.
#[derive(Debug, Deserialize)]
struct GiteaRepoListItem {
owner: GiteaUser,
name: String,
clone_url: String,
}
// ---------------------------------------------------------------------------
// Provider
// ---------------------------------------------------------------------------
/// Gitea / Forgejo forge provider.
///
/// Implements [`ForgeProvider`] for self-hosted Gitea and Forgejo instances.
///
/// # Fields
///
/// - `base_url` -- The instance URL (e.g. `https://gitea.example.com`), used
/// as the prefix for all API calls (`/api/v1/...`).
/// - `api_token` -- A personal access token or OAuth2 token, sent via
/// `Authorization: token <value>`.
/// - `webhook_secret` -- Shared HMAC-SHA256 secret for verifying incoming
/// webhooks.
/// - `client` -- A reusable `reqwest::Client` for connection pooling.
pub struct GiteaProvider {
/// Instance base URL (no trailing slash).
base_url: String,
/// Personal access token for API authentication.
api_token: String,
/// Shared HMAC secret for webhook verification.
webhook_secret: String,
/// Reusable HTTP client.
client: Client,
}
impl GiteaProvider {
/// Create a new Gitea provider.
///
/// # Parameters
///
/// * `base_url` -- The Gitea instance URL, e.g. `https://gitea.example.com`.
/// A trailing slash is stripped automatically.
/// * `api_token` -- Personal access token or OAuth2 token for API
/// authentication.
/// * `webhook_secret` -- Shared secret string configured in the Gitea
/// webhook settings, used for HMAC-SHA256 verification.
pub fn new(base_url: String, api_token: String, webhook_secret: String) -> Self {
Self {
base_url: base_url.trim_end_matches('/').to_string(),
api_token,
webhook_secret,
client: Client::new(),
}
}
/// Map Jupiter's [`CommitStatus`] enum to the string values expected by
/// the Gitea commit status API.
fn gitea_status_string(status: CommitStatus) -> &'static str {
match status {
CommitStatus::Pending => "pending",
CommitStatus::Success => "success",
CommitStatus::Failure => "failure",
CommitStatus::Error => "error",
}
}
/// Convert a Gitea PR action string to the internal [`PullRequestAction`].
///
/// Note the spelling difference: Gitea uses `"synchronized"` (with a
/// trailing "d") while GitHub uses `"synchronize"` (without). Both map
/// to [`PullRequestAction::Synchronize`] internally.
fn parse_pr_action(action: &str) -> Option<PullRequestAction> {
match action {
"opened" => Some(PullRequestAction::Opened),
"synchronized" => Some(PullRequestAction::Synchronize),
"reopened" => Some(PullRequestAction::Reopened),
"closed" => Some(PullRequestAction::Closed),
_ => None,
}
}
}
#[async_trait]
impl ForgeProvider for GiteaProvider {
fn forge_type(&self) -> ForgeType {
ForgeType::Gitea
}
/// Verify a Gitea webhook using HMAC-SHA256.
///
/// Gitea sends the `X-Gitea-Signature` header containing the **raw hex
/// HMAC-SHA256 digest** (no `sha256=` prefix, unlike GitHub). This is
/// the key protocol difference in signature format between the two forges.
///
/// The constant-time comparison logic is identical to the GitHub provider:
/// byte-wise XOR with OR accumulation to prevent timing attacks.
fn verify_webhook(
&self,
signature_header: Option<&str>,
body: &[u8],
) -> Result<bool, ForgeError> {
let hex_sig = match signature_header {
Some(h) => h,
None => return Ok(false),
};
// Gitea sends the raw hex HMAC-SHA256 (no "sha256=" prefix).
let mut mac = HmacSha256::new_from_slice(self.webhook_secret.as_bytes())
.map_err(|e| ForgeError::ParseError(format!("HMAC init error: {e}")))?;
mac.update(body);
let result = hex::encode(mac.finalize().into_bytes());
if result.len() != hex_sig.len() {
return Ok(false);
}
let equal = result
.as_bytes()
.iter()
.zip(hex_sig.as_bytes())
.fold(0u8, |acc, (a, b)| acc | (a ^ b));
Ok(equal == 0)
}
/// Parse a Gitea webhook payload into a [`RawForgeEvent`].
///
/// Recognized `X-Gitea-Event` values:
///
/// - `"push"` -- produces [`RawForgeEvent::Push`].
/// - `"pull_request"` -- produces [`RawForgeEvent::PullRequest`] for
/// `opened`, `synchronized`, `reopened`, and `closed` actions.
///
/// Gitea PR payloads have optional `sha` and `ref` fields (they can be
/// `null` for deleted branches), so this method handles `None` values
/// gracefully by defaulting to empty strings. The `base_ref` falls back
/// to the `label` field if `ref_name` is absent.
fn parse_webhook(
&self,
event_type: &str,
body: &[u8],
) -> Result<Option<RawForgeEvent>, ForgeError> {
match event_type {
"push" => {
let payload: GiteaPushPayload = serde_json::from_slice(body)?;
debug!(
repo = %payload.repository.name,
git_ref = %payload.git_ref,
"parsed Gitea push event"
);
Ok(Some(RawForgeEvent::Push {
repo_owner: payload.repository.owner.name(),
repo_name: payload.repository.name,
git_ref: payload.git_ref,
before: payload.before,
after: payload.after,
sender: payload.sender.name(),
}))
}
"pull_request" => {
let payload: GiteaPRPayload = serde_json::from_slice(body)?;
let action = match Self::parse_pr_action(&payload.action) {
Some(a) => a,
None => {
debug!(action = %payload.action, "ignoring Gitea PR action");
return Ok(None);
}
};
let head_sha = payload
.pull_request
.head
.sha
.unwrap_or_default();
let base_ref = payload
.pull_request
.base
.ref_name
.or(payload.pull_request.base.label)
.unwrap_or_default();
Ok(Some(RawForgeEvent::PullRequest {
repo_owner: payload.repository.owner.name(),
repo_name: payload.repository.name,
action,
pr_number: payload.number,
head_sha,
base_ref,
}))
}
other => {
debug!(event = %other, "ignoring unhandled Gitea event type");
Ok(None)
}
}
}
/// Report a commit status to Gitea via `POST /api/v1/repos/{owner}/{repo}/statuses/{sha}`.
///
/// Gitea's status API is modeled after GitHub's, so the request body
/// is structurally identical. The key difference is the authentication
/// header: Gitea uses `Authorization: token <value>` rather than
/// `Authorization: Bearer <value>`.
async fn set_commit_status(
&self,
repo_owner: &str,
repo_name: &str,
commit_sha: &str,
status: &CommitStatusUpdate,
) -> Result<(), ForgeError> {
let url = format!(
"{}/api/v1/repos/{}/{}/statuses/{}",
self.base_url, repo_owner, repo_name, commit_sha,
);
let body = GiteaStatusRequest {
state: Self::gitea_status_string(status.status).to_string(),
context: status.context.clone(),
description: status.description.clone().unwrap_or_default(),
target_url: status.target_url.clone(),
};
let resp = self
.client
.post(&url)
.header("Authorization", format!("token {}", self.api_token))
.header("Content-Type", "application/json")
.json(&body)
.send()
.await?;
if !resp.status().is_success() {
let status_code = resp.status();
let text = resp.text().await.unwrap_or_default();
warn!(%status_code, body = %text, "Gitea status API error");
return Err(ForgeError::ApiError(format!(
"Gitea API returned {status_code}: {text}"
)));
}
Ok(())
}
/// Return the clone URL for a Gitea repository.
///
/// Unlike GitHub (which embeds the access token in the URL), Gitea clone
/// URLs are plain HTTPS. Authentication for git operations is expected
/// to be handled out-of-band by the agent (e.g. via `.netrc`,
/// `credential.helper`, or an `http.extraheader` git config entry).
async fn clone_url(
&self,
repo_owner: &str,
repo_name: &str,
) -> Result<String, ForgeError> {
Ok(format!(
"{}/{}/{}.git",
self.base_url, repo_owner, repo_name,
))
}
/// List repositories accessible to the authenticated Gitea user.
///
/// Uses `GET /api/v1/repos/search?limit=50` to fetch repositories.
/// The `limit=50` parameter increases the page size from the default
/// (typically 20).
///
/// Note: This does not yet handle pagination; users with more than 50
/// accessible repositories will only see the first page.
async fn list_repos(&self) -> Result<Vec<(String, String, String)>, ForgeError> {
let url = format!("{}/api/v1/repos/search?limit=50", self.base_url);
let resp = self
.client
.get(&url)
.header("Authorization", format!("token {}", self.api_token))
.send()
.await?;
if !resp.status().is_success() {
let status_code = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(ForgeError::ApiError(format!(
"Gitea API returned {status_code}: {text}"
)));
}
let repos: Vec<GiteaRepoListItem> = resp.json().await?;
Ok(repos
.into_iter()
.map(|r| (r.owner.name(), r.name, r.clone_url))
.collect())
}
}

View file

@ -0,0 +1,496 @@
//! GitHub / GitHub Enterprise forge provider for Jupiter CI.
//!
//! This module implements the [`ForgeProvider`] trait for GitHub, handling:
//!
//! - **Webhook verification** using HMAC-SHA256 with the `X-Hub-Signature-256`
//! header. GitHub sends signatures in `sha256=<hex>` format; the provider
//! strips the prefix, computes the expected HMAC, and performs constant-time
//! comparison to prevent timing attacks.
//!
//! - **Webhook parsing** for `push` and `pull_request` events (via the
//! `X-GitHub-Event` header). Other event types (e.g. `star`, `fork`,
//! `issue_comment`) are silently ignored by returning `Ok(None)`.
//!
//! - **Commit status reporting** via `POST /repos/{owner}/{repo}/statuses/{sha}`.
//! This causes GitHub to show Jupiter CI results as status checks on PRs and
//! commits.
//!
//! - **Authenticated clone URLs** using the `x-access-token` scheme that GitHub
//! App installation tokens require.
//!
//! - **Repository listing** via the GitHub App installation API
//! (`GET /installation/repositories`).
//!
//! ## Authentication Model
//!
//! GitHub Apps authenticate in two stages:
//!
//! 1. The App signs a JWT with its RSA private key to identify itself.
//! 2. The JWT is exchanged for a short-lived **installation token** scoped to
//! the repositories the App has been installed on.
//!
//! Currently, the provider accepts a pre-minted installation token directly
//! (the `api_token` field). The `app_id` and `private_key_pem` fields are
//! stored for future automatic token rotation.
//!
//! ## GitHub Enterprise Support
//!
//! The [`GitHubProvider::with_api_base`] builder method allows pointing the
//! provider at a GitHub Enterprise instance by overriding the default
//! `https://api.github.com` base URL.
use async_trait::async_trait;
use hmac::{Hmac, Mac};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use sha2::Sha256;
use tracing::{debug, warn};
use crate::error::ForgeError;
use crate::{ForgeProvider, RawForgeEvent};
use jupiter_api_types::{CommitStatus, CommitStatusUpdate, ForgeType, PullRequestAction};
/// Type alias for HMAC-SHA256, used for webhook signature verification.
type HmacSha256 = Hmac<Sha256>;
// ---------------------------------------------------------------------------
// Internal serde types for GitHub webhook payloads
// ---------------------------------------------------------------------------
//
// These structs mirror the relevant subset of GitHub's webhook JSON schemas.
// Only the fields that Jupiter needs are deserialized; everything else is
// silently ignored by serde.
/// Payload for `push` events.
#[derive(Debug, Deserialize)]
struct GitHubPushPayload {
/// Full git ref, e.g. `refs/heads/main` or `refs/tags/v1.0`.
#[serde(rename = "ref")]
git_ref: String,
/// SHA before the push (all-zeros for newly created refs).
before: String,
/// SHA after the push.
after: String,
repository: GitHubRepo,
sender: GitHubUser,
}
/// Repository object embedded in webhook payloads.
#[derive(Debug, Deserialize)]
struct GitHubRepo {
owner: GitHubRepoOwner,
name: String,
#[allow(dead_code)]
clone_url: Option<String>,
}
/// The `owner` sub-object inside a repository payload.
#[derive(Debug, Deserialize)]
struct GitHubRepoOwner {
login: String,
}
/// User object (sender) embedded in webhook payloads.
#[derive(Debug, Deserialize)]
struct GitHubUser {
login: String,
}
/// Payload for `pull_request` events.
#[derive(Debug, Deserialize)]
struct GitHubPRPayload {
/// The action that triggered this event (e.g. "opened", "synchronize").
action: String,
/// Pull request number.
number: u64,
pull_request: GitHubPR,
repository: GitHubRepo,
}
/// The `pull_request` object inside the PR event payload.
#[derive(Debug, Deserialize)]
struct GitHubPR {
head: GitHubPRRef,
base: GitHubPRRef,
}
/// A ref endpoint (head or base) of a pull request.
#[derive(Debug, Deserialize)]
struct GitHubPRRef {
/// The commit SHA at this ref.
sha: String,
/// Branch name.
#[serde(rename = "ref")]
ref_name: String,
}
/// Request body for `POST /repos/{owner}/{repo}/statuses/{sha}`.
///
/// Maps to the GitHub REST API "Create a commit status" endpoint.
/// The `target_url` field is optional and links back to the Jupiter
/// build page for the evaluation.
#[derive(Debug, Serialize)]
struct GitHubStatusRequest {
/// One of: `"pending"`, `"success"`, `"failure"`, `"error"`.
state: String,
/// A label that identifies this status (e.g. `"jupiter-ci/eval"`).
context: String,
/// Human-readable description of the status.
description: String,
/// Optional URL linking to the Jupiter build details page.
#[serde(skip_serializing_if = "Option::is_none")]
target_url: Option<String>,
}
/// Minimal repo item returned by `GET /installation/repositories`.
#[derive(Debug, Deserialize)]
struct GitHubRepoListItem {
owner: GitHubRepoOwner,
name: String,
clone_url: String,
}
/// Wrapper for the paginated response from `GET /installation/repositories`.
#[derive(Debug, Deserialize)]
struct GitHubInstallationReposResponse {
repositories: Vec<GitHubRepoListItem>,
}
// ---------------------------------------------------------------------------
// Provider
// ---------------------------------------------------------------------------
/// GitHub App forge provider.
///
/// Implements [`ForgeProvider`] for GitHub.com and GitHub Enterprise.
///
/// # Fields
///
/// - `api_base` -- Base URL for API requests (default: `https://api.github.com`).
/// Overridden via [`with_api_base`](GitHubProvider::with_api_base) for
/// GitHub Enterprise.
/// - `app_id` / `private_key_pem` -- GitHub App credentials, reserved for
/// future automatic JWT-based token rotation.
/// - `webhook_secret` -- The shared secret configured in the GitHub webhook
/// settings, used to compute the expected HMAC-SHA256 digest.
/// - `api_token` -- An installation access token (or personal access token)
/// used as a Bearer token for all outbound API requests.
/// - `client` -- A reusable `reqwest::Client` for connection pooling.
pub struct GitHubProvider {
/// Base URL for GitHub API requests (no trailing slash).
api_base: String,
/// GitHub App ID (reserved for future JWT-based token minting).
#[allow(dead_code)]
app_id: u64,
/// PEM-encoded RSA private key for the GitHub App (reserved for future
/// JWT-based token minting).
#[allow(dead_code)]
private_key_pem: String,
/// Shared HMAC secret for webhook signature verification.
webhook_secret: String,
/// Bearer token used for all outbound GitHub API calls.
api_token: String,
/// Reusable HTTP client with connection pooling.
client: Client,
}
impl GitHubProvider {
/// Create a new GitHub provider targeting `https://api.github.com`.
///
/// # Parameters
///
/// * `app_id` -- GitHub App ID (numeric). Currently stored for future
/// JWT-based token rotation; not used in API calls yet.
/// * `private_key_pem` -- PEM-encoded RSA private key for the GitHub App.
/// Stored for future JWT minting; not used directly yet.
/// * `webhook_secret` -- The shared secret string configured in the
/// GitHub webhook settings. Used to compute and verify HMAC-SHA256
/// signatures on incoming webhook payloads.
/// * `api_token` -- A valid GitHub installation access token (or personal
/// access token) used as a `Bearer` token for outbound API calls
/// (status updates, repo listing, etc.).
pub fn new(
app_id: u64,
private_key_pem: String,
webhook_secret: String,
api_token: String,
) -> Self {
Self {
api_base: "https://api.github.com".to_string(),
app_id,
private_key_pem,
webhook_secret,
api_token,
client: Client::new(),
}
}
/// Builder method: override the API base URL.
///
/// Use this for GitHub Enterprise Server instances or integration tests
/// with a mock server. The trailing slash is stripped automatically.
///
/// # Example
///
/// ```ignore
/// let provider = GitHubProvider::new(app_id, key, secret, token)
/// .with_api_base("https://github.corp.example.com/api/v3".into());
/// ```
pub fn with_api_base(mut self, base: String) -> Self {
self.api_base = base.trim_end_matches('/').to_string();
self
}
// -- helpers --
/// Map Jupiter's [`CommitStatus`] enum to the string values that the
/// GitHub REST API expects in the `state` field of a status request.
fn github_status_string(status: CommitStatus) -> &'static str {
match status {
CommitStatus::Pending => "pending",
CommitStatus::Success => "success",
CommitStatus::Failure => "failure",
CommitStatus::Error => "error",
}
}
/// Convert a GitHub PR action string to the internal [`PullRequestAction`]
/// enum.
///
/// Returns `None` for actions Jupiter does not act on (e.g. `"labeled"`,
/// `"assigned"`), which causes `parse_webhook` to return `Ok(None)` and
/// silently skip the event.
fn parse_pr_action(action: &str) -> Option<PullRequestAction> {
match action {
"opened" => Some(PullRequestAction::Opened),
"synchronize" => Some(PullRequestAction::Synchronize),
"reopened" => Some(PullRequestAction::Reopened),
"closed" => Some(PullRequestAction::Closed),
_ => None,
}
}
}
#[async_trait]
impl ForgeProvider for GitHubProvider {
fn forge_type(&self) -> ForgeType {
ForgeType::GitHub
}
/// Verify a GitHub webhook using HMAC-SHA256.
///
/// GitHub sends the `X-Hub-Signature-256` header with format
/// `sha256=<hex-digest>`. This method:
///
/// 1. Returns `Ok(false)` if the header is absent (webhook has no secret
/// configured, or the request is forged).
/// 2. Strips the `sha256=` prefix -- returns `Err(ParseError)` if missing.
/// 3. Computes HMAC-SHA256 over the raw body using the shared
/// `webhook_secret`.
/// 4. Compares the computed and received hex digests using **constant-time
/// XOR accumulation** to prevent timing side-channel attacks.
///
/// The constant-time comparison works by XOR-ing each pair of bytes and
/// OR-ing the results into an accumulator. If any byte differs, the
/// accumulator becomes non-zero. This avoids early-exit behavior that
/// would leak information about how many leading bytes match.
fn verify_webhook(
&self,
signature_header: Option<&str>,
body: &[u8],
) -> Result<bool, ForgeError> {
let sig_header = match signature_header {
Some(h) => h,
None => return Ok(false),
};
// GitHub sends "sha256=<hex>" -- strip the prefix.
let hex_sig = sig_header
.strip_prefix("sha256=")
.ok_or_else(|| ForgeError::ParseError("missing sha256= prefix".into()))?;
let mut mac = HmacSha256::new_from_slice(self.webhook_secret.as_bytes())
.map_err(|e| ForgeError::ParseError(format!("HMAC init error: {e}")))?;
mac.update(body);
let result = hex::encode(mac.finalize().into_bytes());
// Constant-time comparison: XOR each byte pair and OR into accumulator.
// If lengths differ the signatures cannot match (and the length itself
// is not secret -- it is always 64 hex chars for SHA-256).
if result.len() != hex_sig.len() {
return Ok(false);
}
let equal = result
.as_bytes()
.iter()
.zip(hex_sig.as_bytes())
.fold(0u8, |acc, (a, b)| acc | (a ^ b));
Ok(equal == 0)
}
/// Parse a GitHub webhook payload into a [`RawForgeEvent`].
///
/// Recognized `X-GitHub-Event` values:
///
/// - `"push"` -- branch/tag push; produces [`RawForgeEvent::Push`].
/// - `"pull_request"` -- PR lifecycle; produces [`RawForgeEvent::PullRequest`]
/// for `opened`, `synchronize`, `reopened`, and `closed` actions.
/// Other PR actions (e.g. `labeled`, `assigned`) return `Ok(None)`.
///
/// All other event types are silently ignored (`Ok(None)`).
fn parse_webhook(
&self,
event_type: &str,
body: &[u8],
) -> Result<Option<RawForgeEvent>, ForgeError> {
match event_type {
"push" => {
let payload: GitHubPushPayload = serde_json::from_slice(body)?;
debug!(
repo = %payload.repository.name,
git_ref = %payload.git_ref,
"parsed GitHub push event"
);
Ok(Some(RawForgeEvent::Push {
repo_owner: payload.repository.owner.login,
repo_name: payload.repository.name,
git_ref: payload.git_ref,
before: payload.before,
after: payload.after,
sender: payload.sender.login,
}))
}
"pull_request" => {
let payload: GitHubPRPayload = serde_json::from_slice(body)?;
let action = match Self::parse_pr_action(&payload.action) {
Some(a) => a,
None => {
debug!(action = %payload.action, "ignoring PR action");
return Ok(None);
}
};
Ok(Some(RawForgeEvent::PullRequest {
repo_owner: payload.repository.owner.login,
repo_name: payload.repository.name,
action,
pr_number: payload.number,
head_sha: payload.pull_request.head.sha,
base_ref: payload.pull_request.base.ref_name,
}))
}
other => {
debug!(event = %other, "ignoring unhandled GitHub event type");
Ok(None)
}
}
}
/// Report a commit status to GitHub via `POST /repos/{owner}/{repo}/statuses/{sha}`.
///
/// This makes Jupiter CI results appear as status checks on pull requests
/// and commit pages in the GitHub UI. The request uses Bearer
/// authentication with the installation token and includes the
/// `application/vnd.github+json` Accept header as recommended by the
/// GitHub REST API documentation.
async fn set_commit_status(
&self,
repo_owner: &str,
repo_name: &str,
commit_sha: &str,
status: &CommitStatusUpdate,
) -> Result<(), ForgeError> {
let url = format!(
"{}/repos/{}/{}/statuses/{}",
self.api_base, repo_owner, repo_name, commit_sha,
);
let body = GitHubStatusRequest {
state: Self::github_status_string(status.status).to_string(),
context: status.context.clone(),
description: status.description.clone().unwrap_or_default(),
target_url: status.target_url.clone(),
};
let resp = self
.client
.post(&url)
.bearer_auth(&self.api_token)
.header("Accept", "application/vnd.github+json")
.header("User-Agent", "jupiter-ci")
.json(&body)
.send()
.await?;
if !resp.status().is_success() {
let status_code = resp.status();
let text = resp.text().await.unwrap_or_default();
warn!(%status_code, body = %text, "GitHub status API error");
return Err(ForgeError::ApiError(format!(
"GitHub API returned {status_code}: {text}"
)));
}
Ok(())
}
/// Return an authenticated HTTPS clone URL for a GitHub repository.
///
/// GitHub App installation tokens authenticate via a special username:
/// `x-access-token`. The resulting URL has the form:
///
/// ```text
/// https://x-access-token:<token>@github.com/<owner>/<repo>.git
/// ```
///
/// Hercules agents use this URL directly with `git clone` -- no additional
/// credential helper configuration is needed.
///
/// Note: For GitHub Enterprise the URL host would need to be derived from
/// `api_base`; the current implementation hardcodes `github.com`.
async fn clone_url(
&self,
repo_owner: &str,
repo_name: &str,
) -> Result<String, ForgeError> {
Ok(format!(
"https://x-access-token:{}@github.com/{}/{}.git",
self.api_token, repo_owner, repo_name,
))
}
/// List repositories accessible to the GitHub App installation.
///
/// Calls `GET /installation/repositories` which returns all repos the
/// App has been granted access to. The response is mapped to
/// `(owner, name, clone_url)` tuples.
///
/// Note: This does not yet handle pagination; installations with more
/// than 30 repositories (GitHub's default page size) will only return
/// the first page.
async fn list_repos(&self) -> Result<Vec<(String, String, String)>, ForgeError> {
let url = format!("{}/installation/repositories", self.api_base);
let resp = self
.client
.get(&url)
.bearer_auth(&self.api_token)
.header("Accept", "application/vnd.github+json")
.header("User-Agent", "jupiter-ci")
.send()
.await?;
if !resp.status().is_success() {
let status_code = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(ForgeError::ApiError(format!(
"GitHub API returned {status_code}: {text}"
)));
}
let body: GitHubInstallationReposResponse = resp.json().await?;
Ok(body
.repositories
.into_iter()
.map(|r| (r.owner.login, r.name, r.clone_url))
.collect())
}
}

View file

@ -0,0 +1,281 @@
//! # jupiter-forge -- Forge Integration Layer for Jupiter CI
//!
//! This crate is the bridge between Jupiter (a self-hosted, wire-compatible
//! replacement for [hercules-ci.com](https://hercules-ci.com)) and the external
//! code forges (GitHub, Gitea, Radicle) where source code lives.
//!
//! ## Architectural Role
//!
//! In the Hercules CI model, the server must:
//!
//! 1. **Receive webhooks** when code changes (pushes, pull requests, patches).
//! 2. **Verify** the webhook is authentic (HMAC signatures or trusted transport).
//! 3. **Parse** the forge-specific JSON payload into a common internal event.
//! 4. **Report CI status** back to the forge so that PR/patch checks reflect the
//! build outcome (pending / success / failure).
//! 5. **Provide authenticated clone URLs** so that Hercules agents can fetch
//! the source code.
//!
//! This crate encapsulates steps 1-5 behind the [`ForgeProvider`] trait. The
//! Jupiter server holds a registry of providers keyed by [`ForgeType`]; when an
//! HTTP request arrives at the webhook endpoint, the server inspects headers to
//! determine the forge, looks up the matching provider, calls
//! [`ForgeProvider::verify_webhook`] and then [`ForgeProvider::parse_webhook`].
//!
//! ## Why `RawForgeEvent` Instead of Database Types?
//!
//! Webhook payloads identify repositories with forge-native names (e.g.
//! `owner/repo` on GitHub, an RID on Radicle) rather than Jupiter database
//! UUIDs. [`RawForgeEvent`] preserves these raw identifiers so the crate
//! stays independent of any database layer. The server resolves raw events
//! into fully-typed `jupiter_api_types::ForgeEvent` objects with DB-backed
//! entity references before scheduling evaluations.
//!
//! ## Protocol Differences Between Forges
//!
//! | Aspect | GitHub | Gitea | Radicle |
//! |----------------------|---------------------------------|-------------------------------|--------------------------------|
//! | **Signature header** | `X-Hub-Signature-256` | `X-Gitea-Signature` | None (trusted local transport) |
//! | **Signature format** | `sha256=<hex>` (prefixed) | Raw hex HMAC-SHA256 | N/A |
//! | **Event header** | `X-GitHub-Event` | `X-Gitea-Event` | CI broker message type |
//! | **Auth model** | GitHub App (installation token) | Personal access token | Local node identity |
//! | **Clone URL** | `https://x-access-token:<tok>@github.com/...` | `https://<host>/<owner>/<repo>.git` | `rad://<RID>` |
//! | **Repo identifier** | `(owner, name)` | `(owner, name)` | RID string |
//! | **PR concept** | Pull Request | Pull Request | Patch (with revisions) |
//!
//! ## Submodules
//!
//! - [`error`] -- shared error enum for all forge operations.
//! - [`github`] -- GitHub / GitHub Enterprise provider.
//! - [`gitea`] -- Gitea / Forgejo provider.
//! - [`radicle`] -- Radicle provider (webhook and polling modes).
pub mod error;
pub mod gitea;
pub mod github;
pub mod radicle;
use async_trait::async_trait;
use jupiter_api_types::{CommitStatusUpdate, ForgeType, PullRequestAction};
/// A raw forge event carrying repository-identifying information (owner/name or RID)
/// rather than a resolved database UUID.
///
/// The server layer resolves these into `jupiter_api_types::ForgeEvent` by
/// looking up the repository in the Jupiter database.
///
/// # Design Rationale
///
/// This enum intentionally uses `String` fields (not database IDs) so that the
/// forge crate has zero coupling to the database schema. Each forge backend
/// populates the variant that matches the incoming webhook:
///
/// - **GitHub / Gitea**: emit [`Push`](RawForgeEvent::Push) or
/// [`PullRequest`](RawForgeEvent::PullRequest) with `repo_owner` and
/// `repo_name` strings extracted from the JSON payload.
/// - **Radicle**: emits [`PatchUpdated`](RawForgeEvent::PatchUpdated) with the
/// Radicle RID (e.g. `rad:z2...`) and patch/revision identifiers, or
/// [`Push`](RawForgeEvent::Push) with the RID in the `repo_name` field and
/// an empty `repo_owner` (Radicle has no owner concept).
#[derive(Debug, Clone)]
pub enum RawForgeEvent {
/// A branch or tag was pushed.
///
/// Emitted by all three forges. For Radicle push events the `repo_owner`
/// is empty and `repo_name` holds the RID.
Push {
/// Repository owner login (empty for Radicle).
repo_owner: String,
/// Repository name, or the Radicle RID for Radicle push events.
repo_name: String,
/// Full git ref, e.g. `refs/heads/main`.
git_ref: String,
/// Commit SHA before the push (all-zeros for new branches).
before: String,
/// Commit SHA after the push.
after: String,
/// Login / node-ID of the user who pushed.
sender: String,
},
/// A pull request was opened, synchronized, reopened, or closed.
///
/// Used by GitHub and Gitea. Radicle uses [`PatchUpdated`](RawForgeEvent::PatchUpdated)
/// instead, since its collaboration model is patch-based rather than
/// branch-based.
PullRequest {
/// Repository owner login.
repo_owner: String,
/// Repository name.
repo_name: String,
/// The action that triggered this event (opened, synchronize, etc.).
action: PullRequestAction,
/// Pull request number.
pr_number: u64,
/// SHA of the head commit on the PR branch.
head_sha: String,
/// Name of the base branch the PR targets.
base_ref: String,
},
/// A Radicle patch was created or updated with a new revision.
///
/// Radicle's collaboration model uses "patches" instead of pull requests.
/// Each patch can have multiple revisions (analogous to force-pushing a
/// PR branch). This event fires for both new patches and new revisions
/// on existing patches.
PatchUpdated {
/// The Radicle Repository ID (e.g. `rad:z2...`).
repo_rid: String,
/// The unique patch identifier.
patch_id: String,
/// The specific revision within the patch.
revision_id: String,
/// The commit SHA at the tip of this revision.
head_sha: String,
},
}
/// Trait implemented by each forge backend (GitHub, Gitea, Radicle).
///
/// The Jupiter server maintains a `HashMap<ForgeType, Box<dyn ForgeProvider>>`
/// registry. When a webhook request arrives, the server:
///
/// 1. Determines the [`ForgeType`] from request headers.
/// 2. Looks up the corresponding `ForgeProvider`.
/// 3. Calls [`verify_webhook`](ForgeProvider::verify_webhook) to authenticate
/// the request.
/// 4. Calls [`parse_webhook`](ForgeProvider::parse_webhook) to extract a
/// [`RawForgeEvent`].
/// 5. Later calls [`set_commit_status`](ForgeProvider::set_commit_status) to
/// report evaluation results back to the forge.
///
/// # Sync vs Async Methods
///
/// Webhook verification and parsing are **synchronous** because they operate
/// purely on in-memory data (HMAC computation + JSON deserialization). Methods
/// that perform HTTP requests to the forge API (`set_commit_status`,
/// `clone_url`, `list_repos`) are **async**.
///
/// # Thread Safety
///
/// Providers must be `Send + Sync` so they can be shared across the async
/// task pool in the server. All mutable state (e.g. HTTP clients) is
/// internally synchronized by `reqwest::Client`.
#[async_trait]
pub trait ForgeProvider: Send + Sync {
/// Returns the [`ForgeType`] discriminant for this provider.
///
/// Used by the server to route incoming webhooks to the correct provider
/// implementation.
fn forge_type(&self) -> ForgeType;
/// Verify the authenticity of an incoming webhook request.
///
/// Each forge uses a different mechanism:
///
/// - **GitHub**: HMAC-SHA256 with a `sha256=` hex prefix in the
/// `X-Hub-Signature-256` header.
/// - **Gitea**: HMAC-SHA256 with raw hex in the `X-Gitea-Signature` header.
/// - **Radicle**: No signature -- Radicle CI broker connections are trusted
/// local transport, so this always returns `Ok(true)`.
///
/// Both GitHub and Gitea implementations use **constant-time comparison**
/// (byte-wise XOR accumulation) to prevent timing side-channel attacks
/// that could allow an attacker to iteratively guess a valid signature.
///
/// # Parameters
///
/// * `signature_header` -- the raw value of the forge's signature header,
/// or `None` if the header was absent. A missing header causes GitHub
/// and Gitea to return `Ok(false)`.
/// * `body` -- the raw HTTP request body bytes used as HMAC input.
///
/// # Returns
///
/// * `Ok(true)` -- signature is valid.
/// * `Ok(false)` -- signature is invalid or missing.
/// * `Err(ForgeError)` -- the signature header was malformed or HMAC
/// initialization failed.
fn verify_webhook(
&self,
signature_header: Option<&str>,
body: &[u8],
) -> Result<bool, error::ForgeError>;
/// Parse a verified webhook payload into a [`RawForgeEvent`].
///
/// This should only be called **after** [`verify_webhook`](ForgeProvider::verify_webhook)
/// returns `Ok(true)`.
///
/// # Parameters
///
/// * `event_type` -- the value of the forge's event-type header:
/// - GitHub: `X-GitHub-Event` (e.g. `"push"`, `"pull_request"`).
/// - Gitea: `X-Gitea-Event` (e.g. `"push"`, `"pull_request"`).
/// - Radicle: CI broker message type (e.g. `"patch"`, `"push"`).
/// * `body` -- the raw JSON request body.
///
/// # Returns
///
/// * `Ok(Some(event))` -- a recognized, actionable event.
/// * `Ok(None)` -- a valid but uninteresting event that Jupiter does not
/// act on (e.g. GitHub "star" or "fork" events, or unhandled PR actions
/// like "labeled").
/// * `Err(ForgeError)` -- the payload could not be parsed.
fn parse_webhook(
&self,
event_type: &str,
body: &[u8],
) -> Result<Option<RawForgeEvent>, error::ForgeError>;
/// Report a commit status back to the forge so that PR checks / patch
/// status reflect the Jupiter CI evaluation result.
///
/// This is the primary feedback mechanism: when an agent finishes
/// evaluating a jobset, the server calls this method to set the commit
/// to `pending`, `success`, or `failure`. On GitHub and Gitea this
/// creates a "status check" visible in the PR UI. On Radicle it posts
/// to the `radicle-httpd` status API.
///
/// # Parameters
///
/// * `repo_owner` -- repository owner (ignored for Radicle).
/// * `repo_name` -- repository name, or RID for Radicle.
/// * `commit_sha` -- the full commit SHA to attach the status to.
/// * `status` -- the status payload (state, context, description, URL).
async fn set_commit_status(
&self,
repo_owner: &str,
repo_name: &str,
commit_sha: &str,
status: &CommitStatusUpdate,
) -> Result<(), error::ForgeError>;
/// Return an authenticated clone URL that a Hercules agent can use to
/// fetch the repository.
///
/// The URL format varies by forge:
///
/// - **GitHub**: `https://x-access-token:<installation-token>@github.com/<owner>/<repo>.git`
/// -- uses the special `x-access-token` username that GitHub App
/// installation tokens require.
/// - **Gitea**: `https://<host>/<owner>/<repo>.git` -- authentication is
/// handled out-of-band (e.g. via git credential helpers or `.netrc`).
/// - **Radicle**: `rad://<RID>` -- the native Radicle protocol URL; the
/// local Radicle node handles authentication via its cryptographic
/// identity.
async fn clone_url(
&self,
repo_owner: &str,
repo_name: &str,
) -> Result<String, error::ForgeError>;
/// List all repositories accessible through this forge connection.
///
/// Returns a vec of `(owner, name, clone_url)` tuples. For Radicle the
/// `owner` is always an empty string since Radicle repositories are
/// identified solely by their RID.
///
/// Used during initial setup and periodic sync to discover which
/// repositories should be tracked by Jupiter.
async fn list_repos(&self) -> Result<Vec<(String, String, String)>, error::ForgeError>;
}

View file

@ -0,0 +1,506 @@
//! Radicle forge provider for Jupiter CI.
//!
//! [Radicle](https://radicle.xyz) is a peer-to-peer code collaboration
//! network. Unlike GitHub and Gitea, Radicle has no central server -- code
//! is replicated across nodes using a gossip protocol. This creates
//! fundamental differences in how Jupiter integrates with Radicle compared
//! to the other forges:
//!
//! - **No webhook signatures**: Radicle CI broker events arrive over trusted
//! local transport (typically a Unix socket or localhost HTTP), so there is
//! no HMAC verification. [`verify_webhook`](ForgeProvider::verify_webhook)
//! always returns `Ok(true)`.
//!
//! - **Repository identity**: Repos are identified by a Radicle ID (RID, e.g.
//! `rad:z2...`) rather than an `(owner, name)` pair. The `repo_owner`
//! field is always empty in events from this provider.
//!
//! - **Patches instead of PRs**: Radicle uses "patches" as its collaboration
//! primitive. Each patch can have multiple revisions (analogous to
//! force-pushing a PR branch). This provider emits
//! [`RawForgeEvent::PatchUpdated`] rather than `PullRequest`.
//!
//! - **Clone URLs**: Radicle repos use the `rad://` protocol, not HTTPS.
//! Agents must have the `rad` CLI and a local Radicle node configured.
//!
//! ## Two Operating Modes
//!
//! The provider supports two modes, selected via [`RadicleMode`]:
//!
//! ### `RadicleMode::CiBroker` (Webhook Mode)
//!
//! In this mode, the `radicle-ci-broker` component pushes events to Jupiter
//! as HTTP POST requests. The provider parses these using
//! [`parse_webhook`](ForgeProvider::parse_webhook) with event types like
//! `"patch"`, `"patch_updated"`, `"patch_created"`, or `"push"`.
//!
//! ### `RadicleMode::HttpdPolling` (Polling Mode)
//!
//! In this mode, Jupiter periodically calls [`poll_changes`](RadicleProvider::poll_changes)
//! to query the `radicle-httpd` REST API for new or updated patches. The
//! caller maintains a `HashMap<String, String>` of `"rid/patch/id" -> last-seen-oid`
//! entries. The provider compares the current state against this map and
//! returns events for any changes detected.
//!
//! Polling is useful when `radicle-ci-broker` is not available, at the cost
//! of higher latency (changes are detected at the poll interval rather than
//! immediately).
use std::collections::HashMap;
use async_trait::async_trait;
use reqwest::Client;
use serde::Deserialize;
use tracing::{debug, warn};
use crate::error::ForgeError;
use crate::{ForgeProvider, RawForgeEvent};
use jupiter_api_types::{CommitStatus, CommitStatusUpdate, ForgeType, RadicleMode};
// ---------------------------------------------------------------------------
// Internal serde types for Radicle httpd API responses
// ---------------------------------------------------------------------------
//
// These structs model the subset of the `radicle-httpd` REST API that Jupiter
// needs for polling mode and status reporting.
/// A repository as returned by `GET /api/v1/repos` on `radicle-httpd`.
#[derive(Debug, Deserialize)]
struct RadicleRepo {
/// The Radicle Repository ID (e.g. `rad:z2...`).
rid: String,
/// Human-readable repository name.
name: String,
#[allow(dead_code)]
description: Option<String>,
}
/// A patch (Radicle's equivalent of a pull request) from the httpd API.
#[derive(Debug, Deserialize)]
struct RadiclePatch {
/// Unique patch identifier.
id: String,
#[allow(dead_code)]
title: String,
#[allow(dead_code)]
state: RadiclePatchState,
/// Ordered list of revisions; the last entry is the most recent.
revisions: Vec<RadicleRevision>,
}
/// A single revision within a Radicle patch.
///
/// Each revision represents a complete rewrite of the patch (analogous to
/// force-pushing a PR branch). The `oid` is the git commit SHA at the
/// tip of that revision.
#[derive(Debug, Deserialize)]
struct RadicleRevision {
/// Revision identifier.
id: String,
/// Git object ID (commit SHA) at the tip of this revision.
oid: String,
}
/// The state sub-object of a Radicle patch (e.g. "open", "merged", "closed").
#[derive(Debug, Deserialize)]
struct RadiclePatchState {
#[allow(dead_code)]
status: String,
}
/// Webhook payload from the Radicle CI broker or a custom webhook sender.
///
/// All fields are optional because Radicle uses a single payload structure
/// for multiple event types. Patch events populate `rid`, `patch_id`,
/// `revision_id`, and `commit`. Push events populate `rid`, `git_ref`,
/// `before`, `commit`, and `sender`.
#[derive(Debug, Deserialize)]
struct RadicleBrokerPayload {
/// The repository RID, e.g. `rad:z2...`.
rid: Option<String>,
/// Patch ID (present for patch events).
patch_id: Option<String>,
/// Revision ID within the patch (present for patch events).
revision_id: Option<String>,
/// The commit SHA at HEAD.
commit: Option<String>,
/// Full git ref (present for push events), e.g. `refs/heads/main`.
#[serde(rename = "ref")]
git_ref: Option<String>,
/// Previous commit SHA (present for push events).
before: Option<String>,
/// Sender node ID or identity (present for push events).
sender: Option<String>,
}
// ---------------------------------------------------------------------------
// Provider
// ---------------------------------------------------------------------------
/// Radicle forge provider for Jupiter CI.
///
/// Supports two operating modes:
///
/// - **`CiBroker`** (webhook mode): receives JSON events pushed by
/// `radicle-ci-broker` over trusted local HTTP.
/// - **`HttpdPolling`** (poll mode): periodically queries `radicle-httpd`
/// for new patches and compares against last-seen state.
///
/// # Fields
///
/// - `httpd_url` -- Base URL of the `radicle-httpd` instance (e.g.
/// `http://localhost:8080`). Used for API calls in both modes.
/// - `node_id` -- The local Radicle node identity. Reserved for future use
/// (e.g. authenticating status updates).
/// - `mode` -- The active operating mode ([`RadicleMode`]).
/// - `poll_interval_secs` -- Seconds between poll cycles (only meaningful in
/// `HttpdPolling` mode; the actual scheduling is done by the caller).
/// - `client` -- Reusable `reqwest::Client` for connection pooling.
pub struct RadicleProvider {
/// Base URL of the `radicle-httpd` REST API (no trailing slash).
httpd_url: String,
/// Local Radicle node ID (reserved for future authenticated operations).
#[allow(dead_code)]
node_id: String,
/// Operating mode: webhook (`CiBroker`) or poll (`HttpdPolling`).
mode: RadicleMode,
/// Poll interval in seconds (only used in `HttpdPolling` mode).
#[allow(dead_code)]
poll_interval_secs: u64,
/// Reusable HTTP client.
client: Client,
}
impl RadicleProvider {
/// Create a new Radicle provider.
///
/// # Parameters
///
/// * `httpd_url` -- Base URL of the local `radicle-httpd` instance, e.g.
/// `http://localhost:8080`. A trailing slash is stripped automatically.
/// This URL is used for:
/// - Fetching repos and patches during polling (`/api/v1/repos`).
/// - Posting commit statuses (`/api/v1/repos/{rid}/statuses/{sha}`).
/// * `node_id` -- The local Radicle node identity string. Currently
/// stored but not used; will be needed for future authenticated
/// operations.
/// * `mode` -- [`RadicleMode::CiBroker`] for webhook-driven operation or
/// [`RadicleMode::HttpdPolling`] for periodic polling.
/// * `poll_interval_secs` -- How often (in seconds) the server should
/// call [`poll_changes`](RadicleProvider::poll_changes). Only
/// meaningful in `HttpdPolling` mode; the actual timer is managed by
/// the server, not this provider.
pub fn new(
httpd_url: String,
node_id: String,
mode: RadicleMode,
poll_interval_secs: u64,
) -> Self {
Self {
httpd_url: httpd_url.trim_end_matches('/').to_string(),
node_id,
mode,
poll_interval_secs,
client: Client::new(),
}
}
/// Poll `radicle-httpd` for new or updated patches.
///
/// This is the core of `HttpdPolling` mode. The caller maintains a
/// persistent map of `"rid/patch/id" -> last-seen-oid` entries and passes
/// it in as `known_refs`. The method:
///
/// 1. Fetches all repos from `radicle-httpd`.
/// 2. For each repo, fetches all patches.
/// 3. For each patch, looks at the **latest revision** (the last element
/// in the `revisions` array).
/// 4. Compares the revision's `oid` against the `known_refs` map.
/// 5. If the oid is different (or absent from the map), emits a
/// [`RawForgeEvent::PatchUpdated`].
///
/// The caller is responsible for updating `known_refs` with the returned
/// events after processing them.
///
/// # Errors
///
/// Returns `Err(ForgeError::NotConfigured)` if called on a provider in
/// `CiBroker` mode, since polling is not applicable when events are
/// pushed by the broker.
///
/// # Parameters
///
/// * `known_refs` -- Map of `"rid/patch/id"` keys to last-known commit
/// SHA values. An empty map means all patches will be reported as new.
pub async fn poll_changes(
&self,
known_refs: &HashMap<String, String>,
) -> Result<Vec<RawForgeEvent>, ForgeError> {
if self.mode != RadicleMode::HttpdPolling {
return Err(ForgeError::NotConfigured(
"poll_changes called but provider is in webhook mode".into(),
));
}
let repos = self.fetch_repos().await?;
let mut events = Vec::new();
for repo in &repos {
// Check patches for each repo.
let patches = self.fetch_patches(&repo.rid).await?;
for patch in patches {
if let Some(rev) = patch.revisions.last() {
let key = format!("{}/patch/{}", repo.rid, patch.id);
let is_new = known_refs
.get(&key)
.map(|prev| prev != &rev.oid)
.unwrap_or(true);
if is_new {
events.push(RawForgeEvent::PatchUpdated {
repo_rid: repo.rid.clone(),
patch_id: patch.id.clone(),
revision_id: rev.id.clone(),
head_sha: rev.oid.clone(),
});
}
}
}
}
Ok(events)
}
// -- internal helpers ---------------------------------------------------
/// Fetch the list of all repositories known to `radicle-httpd`.
///
/// Calls `GET /api/v1/repos`. No authentication is required because
/// `radicle-httpd` runs locally and trusts all connections.
async fn fetch_repos(&self) -> Result<Vec<RadicleRepo>, ForgeError> {
let url = format!("{}/api/v1/repos", self.httpd_url);
let resp = self.client.get(&url).send().await?;
if !resp.status().is_success() {
let status_code = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(ForgeError::ApiError(format!(
"radicle-httpd returned {status_code}: {text}"
)));
}
let repos: Vec<RadicleRepo> = resp.json().await?;
Ok(repos)
}
/// Fetch all patches for a given repository.
///
/// Calls `GET /api/v1/repos/{rid}/patches`. Returns patches in all
/// states (open, merged, closed) so that the polling logic can detect
/// new revisions even on previously-seen patches.
async fn fetch_patches(&self, rid: &str) -> Result<Vec<RadiclePatch>, ForgeError> {
let url = format!("{}/api/v1/repos/{}/patches", self.httpd_url, rid);
let resp = self.client.get(&url).send().await?;
if !resp.status().is_success() {
let status_code = resp.status();
let text = resp.text().await.unwrap_or_default();
return Err(ForgeError::ApiError(format!(
"radicle-httpd returned {status_code}: {text}"
)));
}
let patches: Vec<RadiclePatch> = resp.json().await?;
Ok(patches)
}
/// Map Jupiter's [`CommitStatus`] enum to the string values expected by
/// the `radicle-httpd` status API.
fn radicle_status_string(status: CommitStatus) -> &'static str {
match status {
CommitStatus::Pending => "pending",
CommitStatus::Success => "success",
CommitStatus::Failure => "failure",
CommitStatus::Error => "error",
}
}
}
#[async_trait]
impl ForgeProvider for RadicleProvider {
fn forge_type(&self) -> ForgeType {
ForgeType::Radicle
}
/// Radicle webhook verification is a **no-op** that always returns
/// `Ok(true)`.
///
/// Unlike GitHub and Gitea, Radicle CI broker events arrive over trusted
/// local transport (e.g. a localhost HTTP connection or a Unix socket).
/// There is no shared secret or HMAC signature because the threat model
/// assumes the broker and Jupiter run on the same machine or within a
/// trusted network boundary.
///
/// If Jupiter is exposed to untrusted networks, access to the Radicle
/// webhook endpoint should be restricted at the network layer (firewall,
/// reverse proxy allowlist, etc.).
fn verify_webhook(
&self,
_signature_header: Option<&str>,
_body: &[u8],
) -> Result<bool, ForgeError> {
Ok(true)
}
/// Parse a Radicle CI broker webhook payload.
///
/// Recognized event types:
///
/// - `"patch"`, `"patch_updated"`, `"patch_created"` -- a Radicle patch
/// was created or received a new revision. Produces
/// [`RawForgeEvent::PatchUpdated`]. The `rid`, `patch_id`, and
/// `commit` fields are required; `revision_id` defaults to empty.
///
/// - `"push"` -- a ref was updated on a Radicle repository. Produces
/// [`RawForgeEvent::Push`] with `repo_owner` set to empty (Radicle
/// has no owner concept) and `repo_name` set to the RID. The `git_ref`
/// defaults to `"refs/heads/main"` and `before` defaults to 40 zeros
/// (indicating a new ref) if not present in the payload.
///
/// All other event types are silently ignored.
fn parse_webhook(
&self,
event_type: &str,
body: &[u8],
) -> Result<Option<RawForgeEvent>, ForgeError> {
match event_type {
"patch" | "patch_updated" | "patch_created" => {
let payload: RadicleBrokerPayload = serde_json::from_slice(body)?;
let rid = payload.rid.ok_or_else(|| {
ForgeError::ParseError("missing rid in Radicle patch event".into())
})?;
let patch_id = payload.patch_id.ok_or_else(|| {
ForgeError::ParseError("missing patch_id in Radicle patch event".into())
})?;
let revision_id = payload.revision_id.unwrap_or_default();
let head_sha = payload.commit.ok_or_else(|| {
ForgeError::ParseError("missing commit in Radicle patch event".into())
})?;
debug!(%rid, %patch_id, "parsed Radicle patch event");
Ok(Some(RawForgeEvent::PatchUpdated {
repo_rid: rid,
patch_id,
revision_id,
head_sha,
}))
}
"push" => {
let payload: RadicleBrokerPayload = serde_json::from_slice(body)?;
let rid = payload.rid.ok_or_else(|| {
ForgeError::ParseError("missing rid in Radicle push event".into())
})?;
let git_ref = payload.git_ref.unwrap_or_else(|| "refs/heads/main".into());
let before = payload.before.unwrap_or_else(|| "0".repeat(40));
let after = payload.commit.unwrap_or_default();
let sender = payload.sender.unwrap_or_default();
debug!(%rid, %git_ref, "parsed Radicle push event");
Ok(Some(RawForgeEvent::Push {
repo_owner: String::new(),
repo_name: rid,
git_ref,
before,
after,
sender,
}))
}
other => {
debug!(event = %other, "ignoring unhandled Radicle event type");
Ok(None)
}
}
}
/// Report a commit status to `radicle-httpd` via
/// `POST /api/v1/repos/{rid}/statuses/{sha}`.
///
/// For Radicle, the `repo_owner` parameter is ignored (Radicle repos have
/// no owner) and `repo_name` is expected to contain the RID.
///
/// No authentication headers are sent because `radicle-httpd` runs
/// locally and trusts all connections.
async fn set_commit_status(
&self,
_repo_owner: &str,
repo_name: &str,
commit_sha: &str,
status: &CommitStatusUpdate,
) -> Result<(), ForgeError> {
// repo_name is the RID for Radicle repos.
let url = format!(
"{}/api/v1/repos/{}/statuses/{}",
self.httpd_url, repo_name, commit_sha,
);
let body = serde_json::json!({
"state": Self::radicle_status_string(status.status),
"context": status.context,
"description": status.description,
"target_url": status.target_url,
});
let resp = self.client.post(&url).json(&body).send().await?;
if !resp.status().is_success() {
let status_code = resp.status();
let text = resp.text().await.unwrap_or_default();
warn!(%status_code, body = %text, "radicle-httpd status API error");
return Err(ForgeError::ApiError(format!(
"radicle-httpd returned {status_code}: {text}"
)));
}
Ok(())
}
/// Return a `rad://` clone URL for a Radicle repository.
///
/// Radicle uses its own protocol (`rad://`) for cloning. The agent
/// machine must have the `rad` CLI installed and a local Radicle node
/// running to resolve and fetch from this URL.
///
/// If the RID already starts with `rad:`, it is returned as-is.
/// Otherwise, the `rad://` prefix is prepended. The `repo_owner`
/// parameter is ignored (Radicle has no owner concept).
async fn clone_url(
&self,
_repo_owner: &str,
repo_name: &str,
) -> Result<String, ForgeError> {
// For Radicle the "repo_name" is the RID.
// Ensure it has the rad:// prefix.
if repo_name.starts_with("rad:") {
Ok(repo_name.to_string())
} else {
Ok(format!("rad://{repo_name}"))
}
}
/// List all repositories known to the local `radicle-httpd` instance.
///
/// Returns `(owner, name, clone_url)` tuples where:
/// - `owner` is always an empty string (Radicle has no owner concept).
/// - `name` is the human-readable repository name.
/// - `clone_url` is the `rad://` URL derived from the RID.
async fn list_repos(&self) -> Result<Vec<(String, String, String)>, ForgeError> {
let repos = self.fetch_repos().await?;
Ok(repos
.into_iter()
.map(|r| {
let clone_url = if r.rid.starts_with("rad:") {
r.rid.clone()
} else {
format!("rad://{}", r.rid)
};
// Radicle has no "owner"; use empty string.
(String::new(), r.name, clone_url)
})
.collect())
}
}

View file

@ -0,0 +1,18 @@
[package]
name = "jupiter-scheduler"
version.workspace = true
edition.workspace = true
[dependencies]
jupiter-api-types = { workspace = true }
jupiter-db = { workspace = true }
jupiter-forge = { workspace = true }
tokio = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
async-trait = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
anyhow = { workspace = true }

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,68 @@
//! Error types for the Jupiter scheduler.
//!
//! Every fallible scheduler operation returns [`Result<T>`], which uses
//! [`SchedulerError`] as its error type. Errors originate from three main
//! sources:
//!
//! - **Database layer** (`jupiter_db`) -- query failures, constraint violations,
//! connection issues.
//! - **Forge layer** (`jupiter_forge`) -- failures when reporting commit status
//! back to GitHub, Gitea, Radicle, etc.
//! - **Scheduler logic** -- invalid state transitions, missing jobs, or missing
//! agents for a required platform.
use thiserror::Error;
/// Errors that can occur during scheduler operations.
///
/// The scheduler is intentionally lenient: most errors are logged and do **not**
/// crash the event loop (see [`crate::engine::SchedulerEngine::run`]). Individual
/// event handlers return `Result<()>` so the loop can log the failure and
/// continue processing the next event.
#[derive(Debug, Error)]
pub enum SchedulerError {
/// A database operation failed. This wraps errors from the `jupiter_db`
/// crate and can indicate connection failures, SQL constraint violations,
/// or missing rows.
#[error("database error: {0}")]
Db(#[from] jupiter_db::error::DbError),
/// A forge API call failed. This typically occurs when reporting commit
/// status back to the forge (GitHub, Gitea, etc.) and could be caused by
/// network issues, expired tokens, or rate limiting.
#[error("forge error: {0}")]
Forge(#[from] jupiter_forge::error::ForgeError),
/// No connected agent can serve the requested platform (e.g.,
/// `x86_64-linux`, `aarch64-darwin`). The task remains in the queue and
/// will be picked up when a suitable agent connects.
#[error("no agent available for platform: {0}")]
NoAgentAvailable(String),
/// A job with the given UUID was not found in the database. This can
/// happen if a job is deleted while events referencing it are still
/// in-flight in the scheduler channel.
#[error("job not found: {0}")]
JobNotFound(uuid::Uuid),
/// An attempted job state transition is not valid. For example, trying to
/// move a `Succeeded` job to `Evaluating` without going through a re-run
/// reset. The `from` and `to` fields contain human-readable state names.
///
/// Valid transitions are documented in [`crate::engine::SchedulerEngine`].
#[error("invalid state transition: {from} -> {to}")]
InvalidTransition {
/// The current state of the job at the time of the transition attempt.
from: String,
/// The target state that was rejected.
to: String,
},
/// A catch-all for internal scheduler errors that do not fit the other
/// variants. The contained string provides a human-readable description.
#[error("scheduler error: {0}")]
Internal(String),
}
/// Convenience alias used throughout the scheduler crate.
pub type Result<T> = std::result::Result<T, SchedulerError>;

View file

@ -0,0 +1,65 @@
//! # Jupiter Scheduler
//!
//! The scheduler is the central orchestration engine of **Jupiter**, a self-hosted,
//! wire-compatible replacement for [hercules-ci.com](https://hercules-ci.com).
//!
//! ## Role in the Jupiter architecture
//!
//! Jupiter follows the Hercules CI model where a server coordinates work that is
//! executed by remote *agents*. The scheduler is the "brain" that drives every job
//! through the Hercules CI pipeline:
//!
//! ```text
//! ForgeEvent --> Job creation --> Evaluation --> Build --> Effects --> Done
//! ```
//!
//! It runs as a long-lived background **tokio task** (see [`engine::SchedulerEngine::run`])
//! and receives [`engine::SchedulerEvent`]s over a bounded `mpsc` channel from:
//!
//! - **Webhook handlers** -- forge push / PR / patch events.
//! - **WebSocket handler** -- messages from connected Hercules CI agents reporting
//! evaluation results, build completions, effect outcomes, etc.
//! - **REST endpoints** -- user-initiated actions such as re-running or cancelling
//! a job.
//!
//! ## Pipeline state machine
//!
//! Each **Job** progresses through the following states (see also
//! [`engine::SchedulerEngine`] for transition logic):
//!
//! ```text
//! ┌──────────┐ ┌────────────┐ ┌──────────┐ ┌────────────────┐ ┌───────────┐
//! │ Pending │───>│ Evaluating │───>│ Building │───>│ RunningEffects │───>│ Succeeded │
//! └──────────┘ └────────────┘ └──────────┘ └────────────────┘ └───────────┘
//! │ │ │
//! v v v
//! ┌──────────────┐ ┌──────────┐ ┌──────────┐
//! │ErrorEvaluating│ │ Failed │ │ Failed │
//! └──────────────┘ └──────────┘ └──────────┘
//! ```
//!
//! Any state can also transition to `Cancelled` via user action.
//!
//! ## Concurrency model
//!
//! - **Effects within a single job** run concurrently -- they are all enqueued at
//! once when the job enters `RunningEffects`.
//! - **Effects across jobs on the same project + ref** are serialized via a
//! `sequence_number` to prevent ordering hazards (e.g., two pushes deploying
//! out of order).
//! - **Builds are deduplicated** by derivation path: if two jobs need the same
//! `.drv`, only one `Build` record is created and linked to both jobs.
//! - **Agent disconnection** causes all in-flight tasks assigned to that agent to
//! be returned to `Pending` state so another agent can pick them up.
//! - **IFD (import-from-derivation)** requires at least 2 concurrent task slots on
//! `x86_64-linux` agents to avoid deadlock (the evaluating agent must be able to
//! build the IFD derivation while still running the evaluation).
//!
//! ## Crate layout
//!
//! - [`engine`] -- The [`SchedulerEngine`](engine::SchedulerEngine) struct and the
//! [`SchedulerEvent`](engine::SchedulerEvent) enum that drives it.
//! - [`error`] -- Error types returned by scheduler operations.
pub mod engine;
pub mod error;

View file

@ -0,0 +1,31 @@
[package]
name = "jupiter-server"
version.workspace = true
edition.workspace = true
[[bin]]
name = "jupiter-server"
path = "src/main.rs"
[dependencies]
jupiter-api-types = { workspace = true }
jupiter-db = { workspace = true }
jupiter-forge = { workspace = true }
jupiter-scheduler = { workspace = true }
jupiter-cache = { workspace = true }
axum = { workspace = true }
axum-extra = { workspace = true }
tower = { workspace = true }
tower-http = { workspace = true }
tokio = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
toml = { workspace = true }
jsonwebtoken = { workspace = true }
bcrypt = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
anyhow = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
futures = { workspace = true }

View file

@ -0,0 +1,208 @@
//! # Authentication and authorization
//!
//! Jupiter uses JWT (JSON Web Tokens) for all authentication. Tokens are signed
//! with an HMAC secret (`jwtPrivateKey` from the server config) and carry a
//! [`TokenScope`] that determines what the bearer is allowed to do.
//!
//! ## Token scopes
//!
//! | Scope | Issued to | Typical lifetime | Purpose |
//! |----------|--------------------|------------------|--------------------------------------------------|
//! | `User` | `hci` CLI / Web UI | 24 hours | Full read/write access to the API |
//! | `Agent` | hercules-ci-agent | Long-lived | Cluster join, task polling, result reporting |
//! | `Effect` | Running effects | 1 hour | Scoped access to state files during effect exec |
//!
//! Effect tokens are the most restrictive: they embed the `project_id` (as `sub`)
//! and the `attribute_path` so that an effect can only read/write state files
//! belonging to its own project. This prevents cross-project data leaks when
//! effects run untrusted Nix code.
//!
//! ## Wire format
//!
//! Tokens are passed in the `Authorization: Bearer <token>` header. The
//! [`BearerToken`] extractor handles parsing this header for axum handlers.
use axum::{
extract::FromRequestParts,
http::{request::Parts, StatusCode},
};
use chrono::{Duration, Utc};
use jsonwebtoken::{decode, encode, DecodingKey, EncodingKey, Header, Validation};
use serde::{Deserialize, Serialize};
/// JWT claims payload embedded in every Jupiter token.
///
/// - `sub`: the subject -- a username for User tokens, an account ID for Agent
/// tokens, or a project ID for Effect tokens.
/// - `exp` / `iat`: standard JWT expiration and issued-at timestamps (Unix epoch seconds).
/// - `scope`: the [`TokenScope`] determining what this token authorizes.
#[derive(Debug, Serialize, Deserialize)]
pub struct Claims {
/// Subject identifier. Interpretation depends on the scope.
pub sub: String,
/// Expiration time as a Unix timestamp (seconds since epoch).
pub exp: i64,
/// Issued-at time as a Unix timestamp.
pub iat: i64,
/// The authorization scope of this token.
pub scope: TokenScope,
}
/// Defines the authorization level of a JWT token.
///
/// Each variant corresponds to a different actor in the system and determines
/// which API endpoints the token grants access to.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TokenScope {
/// Full API access for human users authenticated via the `hci` CLI or web UI.
/// The token's `sub` field contains the username.
User,
/// Access for `hercules-ci-agent` instances that have joined a cluster.
/// The token's `sub` field contains the account ID.
Agent,
/// Scoped access for Nix effects during execution. Effects can only access
/// state files for the project identified by the token's `sub` field, and
/// only for the specific attribute path encoded here.
Effect {
/// The job ID that spawned this effect, used for audit trailing.
job_id: String,
/// The Nix attribute path (e.g. `["effects", "deploy"]`) that this
/// effect is executing. Limits state file access to this path.
attribute_path: Vec<String>,
},
}
/// Create a signed JWT token with the given subject, scope, and validity duration.
///
/// The token is signed using HMAC-SHA256 with the provided secret. It can be
/// verified later with [`verify_jwt`] using the same secret.
///
/// # Arguments
///
/// * `secret` -- the HMAC signing key (from `ServerConfig::jwt_private_key`).
/// * `subject` -- the `sub` claim value (username, account ID, or project ID).
/// * `scope` -- the authorization scope to embed in the token.
/// * `duration_hours` -- how many hours until the token expires.
///
/// # Errors
///
/// Returns a `jsonwebtoken` error if encoding fails (should not happen with
/// valid inputs).
pub fn create_jwt(
secret: &str,
subject: &str,
scope: TokenScope,
duration_hours: i64,
) -> Result<String, jsonwebtoken::errors::Error> {
let now = Utc::now();
let claims = Claims {
sub: subject.to_string(),
exp: (now + Duration::hours(duration_hours)).timestamp(),
iat: now.timestamp(),
scope,
};
encode(
&Header::default(),
&claims,
&EncodingKey::from_secret(secret.as_bytes()),
)
}
/// Verify and decode a JWT token, returning the embedded claims.
///
/// Validates the signature against the provided secret and checks that the
/// token has not expired. On success, returns the deserialized [`Claims`].
///
/// # Errors
///
/// Returns an error if the signature is invalid, the token is expired, or
/// the claims cannot be deserialized.
pub fn verify_jwt(
secret: &str,
token: &str,
) -> Result<Claims, jsonwebtoken::errors::Error> {
let data = decode::<Claims>(
token,
&DecodingKey::from_secret(secret.as_bytes()),
&Validation::default(),
)?;
Ok(data.claims)
}
/// Verify an effect-scoped JWT and extract the project ID, job ID, and
/// attribute path.
///
/// This is used by the `/current-task/state` endpoints to determine which
/// project's state files the caller is authorized to access. The effect
/// token's `sub` field contains the project ID, while the scope payload
/// carries the job ID and attribute path.
///
/// # Returns
///
/// A tuple of `(project_id, job_id, attribute_path)` on success.
///
/// # Errors
///
/// Returns an error if the token is invalid, expired, or does not have
/// the `Effect` scope.
pub fn parse_effect_token(
secret: &str,
token: &str,
) -> Result<(String, String, Vec<String>), jsonwebtoken::errors::Error> {
let claims = verify_jwt(secret, token)?;
match claims.scope {
TokenScope::Effect {
job_id,
attribute_path,
} => Ok((claims.sub, job_id, attribute_path)),
_ => Err(jsonwebtoken::errors::Error::from(
jsonwebtoken::errors::ErrorKind::InvalidToken,
)),
}
}
/// Axum extractor that pulls the bearer token string from the `Authorization`
/// header.
///
/// Expects the header value to be in the form `Bearer <token>`. Returns
/// `401 Unauthorized` if the header is missing or does not have the `Bearer `
/// prefix.
///
/// # Example
///
/// ```ignore
/// async fn protected_handler(BearerToken(token): BearerToken) {
/// // `token` is the raw JWT string
/// }
/// ```
#[allow(dead_code)]
pub struct BearerToken(pub String);
impl<S> FromRequestParts<S> for BearerToken
where
S: Send + Sync,
{
type Rejection = StatusCode;
/// Extract the bearer token from the request's `Authorization` header.
///
/// Looks for a header value starting with `"Bearer "` and strips the
/// prefix to yield the raw token string. Returns `401 Unauthorized`
/// if the header is absent or malformed.
async fn from_request_parts(
parts: &mut Parts,
_state: &S,
) -> Result<Self, Self::Rejection> {
let auth_header = parts
.headers
.get("authorization")
.and_then(|v| v.to_str().ok())
.ok_or(StatusCode::UNAUTHORIZED)?;
if let Some(token) = auth_header.strip_prefix("Bearer ") {
Ok(BearerToken(token.to_string()))
} else {
Err(StatusCode::UNAUTHORIZED)
}
}
}

View file

@ -0,0 +1,58 @@
//! # Server configuration loading
//!
//! Jupiter's configuration is stored as TOML and deserialized into the
//! [`ServerConfig`] type defined in the `jupiter-api-types` crate. The config
//! controls:
//!
//! - **`listen`** -- the socket address the HTTP server binds to (e.g. `"0.0.0.0:3000"`).
//! - **`baseUrl`** -- the externally-visible URL, used when generating callback
//! URLs for forge webhooks and effect tokens.
//! - **`jwtPrivateKey`** -- the HMAC secret used to sign and verify JWT tokens
//! for all three scopes (User, Agent, Effect).
//! - **`[database]`** -- either a SQLite `path` or a full PostgreSQL `url`.
//! The server code is generic over [`StorageBackend`] so both backends work
//! with the same handlers.
//! - **`[[forges]]`** -- an array of forge configurations (GitHub, Gitea) with
//! webhook secrets and API tokens.
//!
//! If the config file does not exist on disk, a sensible development default is
//! used so that `cargo run` works out of the box with no external setup.
use anyhow::Result;
use jupiter_api_types::ServerConfig;
/// Load the server configuration from the given TOML file path.
///
/// If the file cannot be read (e.g. it does not exist), the built-in
/// [`default_config`] string is used instead. This makes first-run
/// setup frictionless for development.
///
/// # Errors
///
/// Returns an error if the TOML content (whether from disk or the default
/// string) cannot be deserialized into [`ServerConfig`].
pub fn load_config(path: &str) -> Result<ServerConfig> {
let content = std::fs::read_to_string(path).unwrap_or_else(|_| default_config());
let config: ServerConfig = toml::from_str(&content)?;
Ok(config)
}
/// Returns a minimal TOML configuration suitable for local development.
///
/// This default listens on all interfaces at port 3000, uses an insecure
/// JWT secret (`"jupiter-dev-secret"`), and stores data in a local SQLite
/// file (`jupiter.db`). No forges are configured, so webhook-driven jobs
/// will not be created until the operator adds a `[[forges]]` section.
fn default_config() -> String {
r#"
listen = "0.0.0.0:3000"
baseUrl = "http://localhost:3000"
jwtPrivateKey = "jupiter-dev-secret"
forges = []
[database]
type = "sqlite"
path = "jupiter.db"
"#
.to_string()
}

View file

@ -0,0 +1,115 @@
//! # Jupiter Server -- main entry point
//!
//! Jupiter is a self-hosted, wire-compatible replacement for hercules-ci.com.
//! This binary is the central server that coordinates the entire CI pipeline:
//!
//! 1. **Agents** connect over WebSocket (`/api/v1/agent/socket`) and receive
//! evaluation, build, and effect tasks dispatched by the scheduler.
//! 2. **The `hci` CLI and web UI** interact through the REST API for browsing
//! projects, jobs, builds, effects, and managing state files.
//! 3. **Forge webhooks** (GitHub, Gitea) trigger the scheduler to create new
//! jobs when commits are pushed or pull requests are opened.
//!
//! ## Startup sequence
//!
//! 1. Initialize `tracing` with the `RUST_LOG` env filter (defaults to `jupiter=info`).
//! 2. Load the TOML configuration from the path given as the first CLI argument,
//! falling back to `jupiter.toml` in the working directory, or a built-in
//! default if the file does not exist.
//! 3. Open (or create) the SQLite database and run migrations.
//! 4. Construct [`AppState`] which bundles the config, database handle,
//! scheduler channel, agent hub, and forge providers.
//! 5. Spawn the [`SchedulerEngine`] on a background tokio task. The engine
//! owns the receiving half of the `mpsc` channel; all other components
//! communicate with it by sending [`SchedulerEvent`]s.
//! 6. Build the axum [`Router`] with all REST and WebSocket routes.
//! 7. Bind a TCP listener and start serving.
//!
//! ## Architecture overview
//!
//! ```text
//! Forge webhook ──> /webhooks/github ──> SchedulerEvent::ForgeEvent
//! │
//! v
//! CLI / UI ──> REST API SchedulerEngine
//! │
//! dispatches tasks via AgentHub
//! │
//! v
//! hercules-ci-agent <── WebSocket <── AgentSessionInfo.tx
//! ```
use std::sync::Arc;
use anyhow::Result;
use tracing::info;
use tracing_subscriber::EnvFilter;
mod config;
mod state;
mod auth;
mod websocket;
mod routes;
use jupiter_db::backend::StorageBackend;
use jupiter_db::sqlite::SqliteBackend;
/// Async entry point powered by the tokio multi-threaded runtime.
///
/// This function orchestrates the full server lifecycle: config loading,
/// database initialization, scheduler startup, router construction, and
/// TCP listener binding. It returns `Ok(())` only when the server shuts
/// down cleanly; any fatal error during startup propagates as `anyhow::Error`.
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(
EnvFilter::from_default_env()
.add_directive("jupiter=info".parse()?),
)
.init();
// Load config
let config_path = std::env::args()
.nth(1)
.unwrap_or_else(|| "jupiter.toml".to_string());
let config = config::load_config(&config_path)?;
let listen_addr = config.listen.clone();
// Initialize database
// If a `path` is set in [database], use SQLite with that file.
// Otherwise fall back to the full `url` field, or a default SQLite file.
let db_url = match &config.database.path {
Some(path) => format!("sqlite:{}?mode=rwc", path),
None => config
.database
.url
.clone()
.unwrap_or_else(|| "sqlite:jupiter.db?mode=rwc".to_string()),
};
let db = SqliteBackend::new(&db_url).await?;
db.run_migrations().await?;
info!("Database initialized");
// Build app state -- this also creates the SchedulerEngine internally
let mut app_state = state::AppState::new(config, Arc::new(db));
// Take ownership of the scheduler out of AppState and run it on a
// dedicated background task. AppState clones (used by handler closures)
// will have `scheduler: None` since Clone skips it.
let scheduler = app_state.take_scheduler();
tokio::spawn(async move {
if let Some(s) = scheduler {
s.run().await;
}
});
// Build the axum router with all routes and shared state
let app = routes::build_router(app_state);
// Start the HTTP/WebSocket server
let listener = tokio::net::TcpListener::bind(&listen_addr).await?;
info!("Jupiter server listening on {}", listen_addr);
axum::serve(listener, app).await?;
Ok(())
}

View file

@ -0,0 +1,258 @@
//! # Agent and account management endpoints
//!
//! This module provides REST endpoints for managing agent sessions, accounts,
//! and cluster join tokens. These endpoints are used by:
//!
//! - **The web UI and `hci` CLI** to list connected agents and manage accounts.
//! - **The `hercules-ci-agent`** to query the service info during initial setup.
//! - **Administrators** to create/revoke cluster join tokens that authorize
//! new agents to connect.
//!
//! ## Cluster join tokens
//!
//! Agents authenticate by presenting a cluster join token during the WebSocket
//! handshake. These tokens are generated as random UUIDs, bcrypt-hashed before
//! storage in the database, and returned in plaintext only once (at creation
//! time). The administrator distributes the token to the agent's configuration
//! file. On connection, the server verifies the token against the stored hash.
//!
//! ## Account model
//!
//! Accounts are the top-level organizational unit in Jupiter (matching the
//! Hercules CI account concept). Each account can have multiple agents,
//! projects, and cluster join tokens. Accounts are currently simple name-based
//! entities with a `User` type.
use axum::{
extract::{Json, Path, State},
http::StatusCode,
response::IntoResponse,
};
use serde_json::{json, Value};
use std::sync::Arc;
use uuid::Uuid;
use jupiter_api_types::AccountType;
use jupiter_db::backend::StorageBackend;
use crate::state::AppState;
/// Handle `GET /api/v1/agent/service-info` -- return protocol version info.
///
/// This lightweight endpoint is called by agents during initial setup to
/// discover the server's protocol version. The response mirrors the
/// `ServiceInfo` OOB frame sent during the WebSocket handshake, allowing
/// agents to verify compatibility before establishing a full connection.
///
/// Returns `{"version": [2, 0]}` indicating protocol version 2.0.
pub async fn service_info() -> Json<Value> {
Json(json!({
"version": [2, 0]
}))
}
/// Handle `GET /api/v1/agents` -- list all currently active agent sessions.
///
/// Returns a JSON array of all agent sessions stored in the database. Each
/// session includes the agent's hostname, supported platforms, and connection
/// metadata. Used by the web UI dashboard to show connected agents.
pub async fn list_agents<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
) -> impl IntoResponse {
match state.db.list_agent_sessions().await {
Ok(sessions) => Json(json!(sessions)).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/agents/{id}` -- get a specific agent session by UUID.
///
/// Returns the full session record for a single agent, or 404 if no session
/// exists with the given ID. The session may have been cleaned up if the
/// agent disconnected.
pub async fn get_agent<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(id): Path<String>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
match state.db.get_agent_session(uuid).await {
Ok(session) => Json(json!(session)).into_response(),
Err(jupiter_db::error::DbError::NotFound { .. }) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/accounts/{account_id}/clusterJoinTokens` -- create a
/// new cluster join token for the given account.
///
/// Generates a random UUID token, bcrypt-hashes it, and stores the hash in the
/// database. The plaintext token is returned in the response body and must be
/// saved by the administrator -- it cannot be recovered later since only the
/// hash is stored.
///
/// ## Request body
///
/// ```json
/// { "name": "my-agent-token" }
/// ```
///
/// ## Response (201 Created)
///
/// ```json
/// { "id": "<token-record-uuid>", "token": "<plaintext-token>" }
/// ```
pub async fn create_join_token<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(account_id): Path<String>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let account_uuid = match Uuid::parse_str(&account_id) {
Ok(u) => u,
Err(_) => {
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": "invalid UUID"})),
)
.into_response()
}
};
let name = body
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("default");
// Generate a random token and bcrypt-hash it for secure storage.
// The plaintext is returned only once in the response.
let token = Uuid::new_v4().to_string();
let token_hash = bcrypt::hash(&token, bcrypt::DEFAULT_COST).unwrap_or_default();
match state
.db
.create_cluster_join_token(account_uuid, name, &token_hash)
.await
{
Ok(cjt) => {
let resp = json!({
"id": cjt.id,
"token": token,
});
(StatusCode::CREATED, Json(resp)).into_response()
}
Err(e) => (
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": e.to_string()})),
)
.into_response(),
}
}
/// Handle `GET /api/v1/accounts/{account_id}/clusterJoinTokens` -- list all
/// join tokens for an account.
///
/// Returns metadata about each token (ID, name, creation date) but NOT the
/// plaintext token or hash, since those are sensitive.
pub async fn list_join_tokens<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(account_id): Path<String>,
) -> impl IntoResponse {
let account_uuid = match Uuid::parse_str(&account_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
match state.db.list_cluster_join_tokens(account_uuid).await {
Ok(tokens) => Json(json!(tokens)).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `DELETE /api/v1/accounts/{account_id}/clusterJoinTokens/{token_id}`
/// -- revoke a cluster join token.
///
/// After deletion, any agent using this token will be unable to re-authenticate
/// on its next connection attempt. Existing connections are not forcibly closed.
/// Returns 204 No Content on success.
pub async fn delete_join_token<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((_, token_id)): Path<(String, String)>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&token_id) {
Ok(u) => u,
Err(_) => return StatusCode::BAD_REQUEST.into_response(),
};
match state.db.delete_cluster_join_token(uuid).await {
Ok(()) => StatusCode::NO_CONTENT.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/accounts` -- create a new account.
///
/// Creates an account with the given name and `User` type. Accounts are the
/// top-level organizational unit that owns projects, agents, and join tokens.
///
/// ## Request body
///
/// ```json
/// { "name": "my-org" }
/// ```
///
/// ## Response (201 Created)
///
/// The full account record including the generated UUID.
pub async fn create_account<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let name = match body.get("name").and_then(|v| v.as_str()) {
Some(n) => n,
None => {
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": "name required"})),
)
.into_response()
}
};
match state.db.create_account(name, AccountType::User).await {
Ok(account) => (StatusCode::CREATED, Json(json!(account))).into_response(),
Err(e) => (
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": e.to_string()})),
)
.into_response(),
}
}
/// Handle `GET /api/v1/accounts` -- list all accounts.
///
/// Returns a JSON array of all account records. Used by the web UI for
/// the account selector and by the CLI for `hci account list`.
pub async fn list_accounts<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
) -> impl IntoResponse {
match state.db.list_accounts().await {
Ok(accounts) => Json(json!(accounts)).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/accounts/{id}` -- get a specific account by UUID.
///
/// Returns the full account record, or 404 if no account exists with that ID.
pub async fn get_account<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(id): Path<String>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
match state.db.get_account(uuid).await {
Ok(account) => Json(json!(account)).into_response(),
Err(jupiter_db::error::DbError::NotFound { .. }) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}

View file

@ -0,0 +1,186 @@
//! # Authentication endpoints
//!
//! This module provides JWT token creation endpoints for two use cases:
//!
//! 1. **User login** (`POST /auth/token`) -- the `hci` CLI authenticates
//! with username/password and receives a User-scoped JWT valid for 24 hours.
//! The password is verified against a bcrypt hash stored in the database.
//!
//! 2. **Effect token issuance** (`POST /projects/{id}/user-effect-token`) --
//! creates a short-lived (1 hour) Effect-scoped JWT for use during effect
//! execution. The token's `sub` is the project ID and its scope contains
//! the attribute path, limiting the effect's access to only its own
//! project's state files.
//!
//! ## Token lifecycle
//!
//! ```text
//! hci login ──POST /auth/token──> Verify password ──> User JWT (24h)
//! │
//! ┌────────────────────────────────────────┘
//! v
//! hci state / web UI ──> Use User JWT for all API calls
//!
//! Scheduler dispatches effect ──> Creates Effect JWT (1h)
//! │
//! v
//! Effect runs ──> Uses Effect JWT for /current-task/state
//! ```
use axum::{
extract::{Json, Path, State},
http::StatusCode,
response::IntoResponse,
};
use serde_json::{json, Value};
use std::sync::Arc;
use uuid::Uuid;
use jupiter_db::backend::StorageBackend;
use crate::auth::{create_jwt, TokenScope};
use crate::state::AppState;
/// Handle `POST /api/v1/auth/token` -- authenticate with username/password
/// and receive a User-scoped JWT.
///
/// The handler:
/// 1. Extracts `username` and `password` from the JSON request body.
/// 2. Looks up the bcrypt password hash for the account from the database.
/// 3. Verifies the password against the stored hash.
/// 4. On success, creates a 24-hour JWT with `TokenScope::User`.
///
/// ## Request body
///
/// ```json
/// {
/// "username": "admin",
/// "password": "secret"
/// }
/// ```
///
/// ## Response (200 OK)
///
/// ```json
/// {
/// "token": "eyJ...",
/// "expiresAt": "2024-01-01T12:00:00Z"
/// }
/// ```
///
/// Returns 401 Unauthorized if the credentials are invalid.
pub async fn create_token<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let username = body.get("username").and_then(|v| v.as_str());
let password = body.get("password").and_then(|v| v.as_str());
match (username, password) {
(Some(user), Some(pass)) => {
// Look up the bcrypt password hash for this account.
let password_hash = match state.db.get_account_password_hash(user).await {
Ok(Some(hash)) => hash,
Ok(None) => {
return (StatusCode::UNAUTHORIZED, "invalid credentials").into_response();
}
Err(jupiter_db::error::DbError::NotFound { .. }) => {
return (StatusCode::UNAUTHORIZED, "invalid credentials").into_response();
}
Err(e) => {
return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response();
}
};
// Verify the plaintext password against the stored bcrypt hash.
match bcrypt::verify(pass, &password_hash) {
Ok(true) => {}
Ok(false) => {
return (StatusCode::UNAUTHORIZED, "invalid credentials").into_response();
}
Err(_) => {
return (StatusCode::UNAUTHORIZED, "invalid credentials").into_response();
}
}
// Create a User-scoped JWT valid for 24 hours.
match create_jwt(&state.config.jwt_private_key, user, TokenScope::User, 24) {
Ok(token) => {
let expires_at = chrono::Utc::now() + chrono::Duration::hours(24);
Json(json!({
"token": token,
"expiresAt": expires_at.to_rfc3339(),
}))
.into_response()
}
Err(e) => {
(StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response()
}
}
}
_ => (StatusCode::BAD_REQUEST, "username and password required").into_response(),
}
}
/// Handle `POST /api/v1/projects/{id}/user-effect-token` -- create an
/// Effect-scoped JWT for a project.
///
/// This endpoint is called by the scheduler (or manually by administrators)
/// to create a token that an effect will use during execution. The token:
///
/// - Has `sub` = the project ID (so the effect can access this project's state).
/// - Has `TokenScope::Effect` with the job ID and attribute path.
/// - Expires after 1 hour (effects should not run longer than that).
///
/// ## Request body
///
/// ```json
/// {
/// "effectAttributePath": ["effects", "deploy"]
/// }
/// ```
///
/// ## Response (200 OK)
///
/// ```json
/// {
/// "token": "eyJ...",
/// "apiBaseUrl": "https://jupiter.example.com"
/// }
/// ```
///
/// The `apiBaseUrl` is included so the effect knows where to send state
/// file requests (it may differ from the agent's server URL in multi-tier
/// deployments).
pub async fn create_effect_token<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(project_id): Path<String>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let _uuid = match Uuid::parse_str(&project_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
let attr_path = body
.get("effectAttributePath")
.and_then(|v| serde_json::from_value::<Vec<String>>(v.clone()).ok())
.unwrap_or_default();
// Build the Effect scope with the project_id as job_id (for backward
// compatibility with the Hercules CI API) and the attribute path.
let scope = TokenScope::Effect {
job_id: project_id.clone(),
attribute_path: attr_path,
};
// Create a short-lived (1 hour) JWT with the project ID as subject.
match create_jwt(&state.config.jwt_private_key, &project_id, scope, 1) {
Ok(token) => Json(json!({
"token": token,
"apiBaseUrl": state.config.base_url,
}))
.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}

View file

@ -0,0 +1,157 @@
//! # Build (derivation) endpoints
//!
//! Builds represent individual Nix derivation builds within a job. After
//! evaluation discovers the set of attributes and their derivation paths,
//! the scheduler creates build records and dispatches build tasks to agents.
//!
//! These endpoints are keyed by derivation path (e.g.
//! `/nix/store/abc...-my-package.drv`) rather than by UUID, matching the
//! Hercules CI API convention. This makes it easy for the `hci` CLI and
//! web UI to link directly to a specific derivation.
//!
//! ## Data flow
//!
//! ```text
//! Evaluation discovers attribute
//! └─> Scheduler creates Build record (status: Pending)
//! └─> Scheduler dispatches build task to agent
//! └─> Agent builds derivation
//! └─> Agent sends BuildDone
//! └─> Scheduler updates Build record (status: Success/Failure)
//! ```
//!
//! ## Log retrieval
//!
//! Build logs are stored as structured log entries (not raw text). The
//! `get_derivation_log` endpoint returns paginated log lines that can be
//! rendered in the web UI or printed by the CLI.
use axum::{
extract::{Path, Query, State},
http::StatusCode,
response::IntoResponse,
Json,
};
use serde_json::json;
use std::sync::Arc;
use uuid::Uuid;
use jupiter_api_types::PaginationParams;
use jupiter_db::backend::StorageBackend;
use jupiter_scheduler::engine::SchedulerEvent;
use crate::state::AppState;
/// Handle `GET /api/v1/accounts/{id}/derivations/{drvPath}` -- get build info
/// for a derivation.
///
/// Looks up the build record by its Nix store derivation path. The account ID
/// path parameter is accepted for API compatibility but is not currently used
/// for filtering (builds are globally unique by derivation path).
///
/// Returns the build record including status, output paths, and timing, or
/// 404 if no build exists for that derivation path.
pub async fn get_derivation<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((_account_id, drv_path)): Path<(String, String)>,
) -> impl IntoResponse {
match state.db.get_build_by_drv_path(&drv_path).await {
Ok(Some(build)) => Json(json!(build)).into_response(),
Ok(None) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/accounts/{id}/derivations/{drvPath}/log/lines` -- get
/// build log lines for a derivation.
///
/// First resolves the derivation path to a build record, then fetches the
/// paginated log entries associated with that build's UUID. Pagination is
/// controlled by `page` (offset, default 0) and `per_page` (limit, default 100).
///
/// Returns `{"lines": [...]}` where each entry is a structured log line with
/// timestamp, level, and message fields.
pub async fn get_derivation_log<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((_account_id, drv_path)): Path<(String, String)>,
Query(params): Query<PaginationParams>,
) -> impl IntoResponse {
// First, resolve the derivation path to a build record.
let build = match state.db.get_build_by_drv_path(&drv_path).await {
Ok(Some(b)) => b,
Ok(None) => return StatusCode::NOT_FOUND.into_response(),
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
};
// Then fetch the log entries for this build's UUID.
let build_id: Uuid = build.id.into();
let offset = params.page.unwrap_or(0);
let limit = params.per_page.unwrap_or(100);
match state.db.get_log_entries(build_id, offset, limit).await {
Ok(entries) => Json(json!({ "lines": entries })).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/accounts/{id}/derivations/{drvPath}/retry` -- retry
/// a failed build.
///
/// Looks up the build by derivation path and sends a [`SchedulerEvent::RetryBuild`]
/// to the scheduler, which will reset the build's status to Pending and
/// re-dispatch it to an available agent. Returns 202 Accepted.
///
/// This is useful when a build fails due to transient issues (network timeouts,
/// Nix store corruption, etc.) and can be retried without re-evaluation.
pub async fn retry_build<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((_account_id, drv_path)): Path<(String, String)>,
) -> impl IntoResponse {
match state.db.get_build_by_drv_path(&drv_path).await {
Ok(Some(build)) => {
let build_id: Uuid = build.id.into();
let _ = state
.scheduler_tx
.send(SchedulerEvent::RetryBuild { build_id })
.await;
(
StatusCode::ACCEPTED,
Json(json!({"status": "retry queued"})),
)
.into_response()
}
Ok(None) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/accounts/{id}/derivations/{drvPath}/cancel` -- cancel
/// a running or pending build.
///
/// Looks up the build by derivation path and sends a
/// [`SchedulerEvent::CancelBuild`] to the scheduler. The scheduler will mark
/// the build as cancelled. If the build is already running on an agent, the
/// agent will be notified to abort.
///
/// Returns 202 Accepted. The actual cancellation is asynchronous.
pub async fn cancel_build<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((_account_id, drv_path)): Path<(String, String)>,
) -> impl IntoResponse {
match state.db.get_build_by_drv_path(&drv_path).await {
Ok(Some(build)) => {
let build_id: Uuid = build.id.into();
let _ = state
.scheduler_tx
.send(SchedulerEvent::CancelBuild { build_id })
.await;
(
StatusCode::ACCEPTED,
Json(json!({"status": "cancel queued"})),
)
.into_response()
}
Ok(None) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}

View file

@ -0,0 +1,150 @@
//! # Effect endpoints
//!
//! Effects are side-effecting Nix actions (e.g. deployments, notifications)
//! that run after builds complete. They are defined as Nix attributes under
//! the `effects` output of a flake and are executed by agents with special
//! scoped tokens that limit their access to only the project's state files.
//!
//! Effects are identified by their job ID and attribute path (e.g. `"deploy"`),
//! matching the Hercules CI API convention. This allows the web UI and CLI
//! to link directly to a specific effect within a job.
//!
//! ## Data flow
//!
//! ```text
//! Evaluation discovers Effect attribute
//! └─> Scheduler creates Effect record (status: Pending)
//! └─> All required builds complete
//! └─> Scheduler dispatches effect task to agent
//! └─> Agent runs effect with scoped JWT token
//! └─> Agent sends EffectDone
//! └─> Scheduler updates Effect record
//! ```
//!
//! ## Scoped access
//!
//! During execution, effects receive a short-lived JWT with `TokenScope::Effect`
//! that grants access to the `/current-task/state` endpoints. This token
//! embeds the project ID and attribute path, ensuring effects can only
//! read/write their own project's state files.
use axum::{
extract::{Path, Query, State},
http::StatusCode,
response::IntoResponse,
Json,
};
use serde_json::json;
use std::sync::Arc;
use uuid::Uuid;
use jupiter_api_types::PaginationParams;
use jupiter_db::backend::StorageBackend;
use jupiter_scheduler::engine::SchedulerEvent;
use crate::state::AppState;
/// Handle `GET /api/v1/jobs/{job_id}/effects/{attr}` -- get effect info.
///
/// Looks up an effect by its parent job UUID and attribute name. The attribute
/// name corresponds to the Nix attribute path under the `effects` output
/// (e.g. `"deploy"`, `"notify"`).
///
/// Returns the effect record including status (Pending/Running/Success/Failure),
/// timing data, and the derivation path. Returns 404 if the effect does not
/// exist for this job.
pub async fn get_effect<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((job_id, attr)): Path<(String, String)>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&job_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
match state.db.get_effect_by_job_and_attr(uuid, &attr).await {
Ok(effect) => Json(json!(effect)).into_response(),
Err(jupiter_db::error::DbError::NotFound { .. }) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/jobs/{job_id}/effects/{attr}/log/lines` -- get effect
/// log lines.
///
/// Retrieves paginated log entries for an effect, identified by job UUID and
/// attribute name. The effect is first resolved to get its UUID, then log
/// entries are fetched with the given offset/limit pagination.
///
/// Returns `{"lines": [...]}` with structured log entries.
pub async fn get_effect_log<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((job_id, attr)): Path<(String, String)>,
Query(params): Query<PaginationParams>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&job_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
// First resolve the effect by job + attribute name.
let effect = match state.db.get_effect_by_job_and_attr(uuid, &attr).await {
Ok(e) => e,
Err(jupiter_db::error::DbError::NotFound { .. }) => {
return StatusCode::NOT_FOUND.into_response()
}
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
};
// Then fetch log entries for the effect's UUID.
let effect_id: Uuid = effect.id.into();
let offset = params.page.unwrap_or(0);
let limit = params.per_page.unwrap_or(100);
match state.db.get_log_entries(effect_id, offset, limit).await {
Ok(entries) => Json(json!({ "lines": entries })).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/jobs/{job_id}/effects/{attr}/cancel` -- cancel a
/// running or pending effect.
///
/// Resolves the effect by job UUID and attribute name, then sends a
/// [`SchedulerEvent::CancelEffect`] to the scheduler. The scheduler will
/// mark the effect as cancelled and notify the agent if the effect is
/// currently running.
///
/// Returns 202 Accepted. The actual cancellation is asynchronous.
pub async fn cancel_effect<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((job_id, attr)): Path<(String, String)>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&job_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
let effect = match state.db.get_effect_by_job_and_attr(uuid, &attr).await {
Ok(e) => e,
Err(jupiter_db::error::DbError::NotFound { .. }) => {
return StatusCode::NOT_FOUND.into_response()
}
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
};
let effect_id: Uuid = effect.id.into();
let _ = state
.scheduler_tx
.send(SchedulerEvent::CancelEffect {
effect_id,
job_id: uuid,
})
.await;
(
StatusCode::ACCEPTED,
Json(json!({"status": "cancel queued"})),
)
.into_response()
}

View file

@ -0,0 +1,37 @@
//! # Health check endpoint
//!
//! Provides a simple liveness probe at `GET /health`. This endpoint is
//! intended for use by load balancers, container orchestrators (e.g.
//! Kubernetes), and monitoring systems to verify that the Jupiter server
//! process is running and able to handle requests.
//!
//! The response includes the service name and the version from `Cargo.toml`,
//! which is useful for verifying that a deployment rolled out the expected
//! binary version.
use axum::Json;
use serde_json::{json, Value};
/// Handle `GET /health` -- return a JSON object indicating the server is alive.
///
/// This endpoint requires no authentication and performs no database queries,
/// so it will succeed even if the database is temporarily unavailable. The
/// response body is:
///
/// ```json
/// {
/// "status": "ok",
/// "service": "jupiter",
/// "version": "0.1.0"
/// }
/// ```
///
/// The `version` field is populated at compile time from the crate's
/// `Cargo.toml` via the `env!("CARGO_PKG_VERSION")` macro.
pub async fn health() -> Json<Value> {
Json(json!({
"status": "ok",
"service": "jupiter",
"version": env!("CARGO_PKG_VERSION"),
}))
}

View file

@ -0,0 +1,183 @@
//! # Job management endpoints
//!
//! Jobs are the top-level unit of CI work in Jupiter. A job is created when
//! the scheduler processes a forge webhook event (push, PR) for a project.
//! Each job progresses through a lifecycle:
//!
//! 1. **Pending** -- waiting for an agent to pick up the evaluation task.
//! 2. **Evaluating** -- an agent is evaluating the Nix flake/expression to
//! discover attributes (packages, effects).
//! 3. **Building** -- evaluation is complete; builds are being dispatched to
//! agents for each discovered derivation.
//! 4. **Complete** -- all builds (and optionally effects) have finished.
//!
//! Jobs can be rerun (re-enqueued for evaluation) or cancelled (all pending
//! tasks are aborted). Both actions are asynchronous: the handler sends a
//! [`SchedulerEvent`] and returns immediately with 202 Accepted.
//!
//! ## Evaluation results
//!
//! The `GET /jobs/{id}/evaluation` endpoint returns the list of Nix attributes
//! discovered during the evaluation phase. Each attribute has a path (e.g.
//! `["packages", "x86_64-linux", "default"]`), a derivation path, and a type
//! (Regular or Effect).
use axum::{
extract::{Json, Path, Query, State},
http::StatusCode,
response::IntoResponse,
};
use serde_json::json;
use std::sync::Arc;
use uuid::Uuid;
use jupiter_api_types::{Paginated, PaginationParams};
use jupiter_db::backend::StorageBackend;
use jupiter_scheduler::engine::SchedulerEvent;
use crate::state::AppState;
/// Handle `GET /api/v1/projects/{project_id}/jobs` -- list jobs for a project.
///
/// Returns a paginated list of jobs belonging to the given project, ordered
/// by creation time (newest first). Pagination is controlled by the `page`
/// and `per_page` query parameters (defaults: page=1, per_page=20).
///
/// The response follows the [`Paginated`] wrapper format:
///
/// ```json
/// {
/// "items": [...],
/// "total": 42,
/// "page": 1,
/// "per_page": 20
/// }
/// ```
pub async fn list_jobs<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(project_id): Path<String>,
Query(params): Query<PaginationParams>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&project_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
let page = params.page.unwrap_or(1);
let per_page = params.per_page.unwrap_or(20);
match state.db.list_jobs_for_project(uuid, page, per_page).await {
Ok((jobs, total)) => Json(json!(Paginated {
items: jobs,
total,
page,
per_page,
}))
.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/jobs/{id}` -- get a specific job by UUID.
///
/// Returns the full job record including status, project linkage, commit
/// info, and timing data. Returns 404 if the job does not exist.
pub async fn get_job<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(id): Path<String>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
match state.db.get_job(uuid).await {
Ok(job) => Json(json!(job)).into_response(),
Err(jupiter_db::error::DbError::NotFound { .. }) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/jobs/{id}/evaluation` -- get evaluation results for a job.
///
/// Returns the list of Nix attributes discovered during the evaluation phase.
/// Each attribute includes its path, derivation path, type (Regular/Effect),
/// and any evaluation errors. This data is used by the web UI to show which
/// packages and effects a job will build/run.
///
/// The response format is:
///
/// ```json
/// {
/// "jobId": "<uuid>",
/// "attributes": [...]
/// }
/// ```
pub async fn get_evaluation<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(id): Path<String>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
match state.db.get_evaluation_attributes(uuid).await {
Ok(attrs) => Json(json!({
"jobId": id,
"attributes": attrs,
}))
.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/jobs/{id}/rerun` -- re-enqueue a job for evaluation.
///
/// Sends a [`SchedulerEvent::RerunJob`] to the scheduler, which will reset
/// the job's status and create a new evaluation task. This is useful when a
/// job failed due to transient issues (network errors, OOM, etc.) and the
/// user wants to retry without pushing a new commit.
///
/// Returns 202 Accepted immediately; the actual rerun happens asynchronously.
pub async fn rerun_job<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(id): Path<String>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
let _ = state
.scheduler_tx
.send(SchedulerEvent::RerunJob { job_id: uuid })
.await;
(
StatusCode::ACCEPTED,
Json(json!({"status": "rerun queued"})),
)
.into_response()
}
/// Handle `POST /api/v1/jobs/{id}/cancel` -- cancel a running or pending job.
///
/// Sends a [`SchedulerEvent::CancelJob`] to the scheduler, which will mark
/// the job and all its pending tasks as cancelled. In-progress builds on
/// agents may continue until they finish, but their results will be discarded.
///
/// Returns 202 Accepted immediately; the actual cancellation happens
/// asynchronously.
pub async fn cancel_job<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(id): Path<String>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
let _ = state
.scheduler_tx
.send(SchedulerEvent::CancelJob { job_id: uuid })
.await;
(
StatusCode::ACCEPTED,
Json(json!({"status": "cancel queued"})),
)
.into_response()
}

View file

@ -0,0 +1,237 @@
//! # HTTP route definitions
//!
//! This module assembles the complete axum [`Router`] for the Jupiter server.
//! All REST API endpoints and the WebSocket upgrade route are registered here.
//!
//! ## Route organization
//!
//! Routes are grouped by domain, mirroring the Hercules CI API structure:
//!
//! | Prefix | Module | Purpose |
//! |------------------------------------|----------------|--------------------------------------------|
//! | `/health` | [`health`] | Liveness probe for load balancers |
//! | `/api/v1/agent/socket` | [`websocket`] | WebSocket upgrade for agent connections |
//! | `/api/v1/agents` | [`agents`] | List/get agent sessions |
//! | `/api/v1/agent/{session,heartbeat,goodbye}` | [`tasks`] | REST-based agent session management |
//! | `/api/v1/accounts` | [`agents`] | Account and cluster join token management |
//! | `/api/v1/projects` | [`projects`] | Project CRUD |
//! | `/api/v1/jobs` | [`jobs`] | Job listing, evaluation, rerun, cancel |
//! | `/api/v1/accounts/:id/derivations` | [`builds`] | Build (derivation) info, logs, retry/cancel |
//! | `/api/v1/jobs/:id/effects` | [`effects`] | Effect info, logs, cancel |
//! | `/api/v1/tasks` | [`tasks`] | Agent task polling, status updates, events |
//! | `/api/v1/projects/:id/state` | [`state_files`]| State file upload/download and locking |
//! | `/api/v1/current-task/state` | [`state_files`]| Effect-scoped state access (via JWT) |
//! | `/api/v1/lock-leases` | [`state_files`]| Lock renewal and release |
//! | `/api/v1/webhooks` | [`webhooks`] | GitHub/Gitea webhook receivers |
//! | `/api/v1/auth` | [`auth`] | Token creation (login, effect tokens) |
//!
//! ## Shared state
//!
//! All handlers receive `State<Arc<AppState<DB>>>` which provides access to the
//! database, scheduler channel, agent hub, and configuration. The state is
//! generic over [`StorageBackend`] so the same route tree works with SQLite
//! or PostgreSQL.
pub mod agents;
pub mod auth;
pub mod builds;
pub mod effects;
pub mod health;
pub mod jobs;
pub mod projects;
pub mod state_files;
pub mod tasks;
pub mod webhooks;
use axum::{
routing::{delete, get, post, put},
Router,
};
use std::sync::Arc;
use jupiter_db::backend::StorageBackend;
use crate::state::AppState;
use crate::websocket;
/// Build the complete axum router with all API routes and shared state.
///
/// This function wraps the [`AppState`] in an `Arc`, registers every route
/// handler, and returns the configured [`Router`]. The router is ready to be
/// passed to `axum::serve`.
///
/// Routes are organized into logical groups:
/// - **Health**: simple liveness check at `/health`.
/// - **Agent WebSocket**: the `/api/v1/agent/socket` endpoint that agents
/// connect to for the binary protocol.
/// - **Agent REST**: session creation, heartbeat, and graceful shutdown
/// endpoints for agents that use the HTTP-based protocol variant.
/// - **Cluster management**: account and join token CRUD for multi-tenant setups.
/// - **Projects/Jobs/Builds/Effects**: the core CI data model, providing
/// read access for the CLI and web UI, plus rerun/cancel actions.
/// - **Tasks**: the agent-facing endpoints for polling work and reporting results.
/// - **State files**: blob upload/download with versioning and distributed locking,
/// powering the `hci state` CLI feature.
/// - **Webhooks**: forge-specific endpoints that verify signatures and trigger
/// the scheduler.
/// - **Auth**: JWT token issuance for user login and effect execution.
pub fn build_router<DB: StorageBackend + 'static>(state: AppState<DB>) -> Router {
let shared_state = Arc::new(state);
Router::new()
// Health check -- used by load balancers and orchestrators to verify
// the server is running and responsive.
.route("/health", get(health::health))
// Agent WebSocket -- the primary connection path for hercules-ci-agent
// instances. Upgrades to WebSocket and runs the wire protocol.
.route(
"/api/v1/agent/socket",
get(websocket::handler::ws_handler::<DB>),
)
// Agent REST endpoints -- list/get agent sessions for the web UI and CLI.
.route("/api/v1/agents", get(agents::list_agents::<DB>))
.route("/api/v1/agents/{id}", get(agents::get_agent::<DB>))
.route("/api/v1/agent/service-info", get(agents::service_info))
// Agent lifecycle via REST -- alternative to WebSocket for agents that
// prefer HTTP polling (session creation, heartbeat, goodbye).
.route(
"/api/v1/agent/session",
post(tasks::create_agent_session::<DB>),
)
.route(
"/api/v1/agent/heartbeat",
post(tasks::agent_heartbeat::<DB>),
)
.route("/api/v1/agent/goodbye", post(tasks::agent_goodbye::<DB>))
// Cluster join tokens -- manage tokens that allow new agents to join
// an account's cluster. Tokens are bcrypt-hashed before storage.
.route(
"/api/v1/accounts/{account_id}/clusterJoinTokens",
post(agents::create_join_token::<DB>).get(agents::list_join_tokens::<DB>),
)
.route(
"/api/v1/accounts/{account_id}/clusterJoinTokens/{token_id}",
delete(agents::delete_join_token::<DB>),
)
// Account endpoints -- CRUD for accounts (organizations or users).
.route(
"/api/v1/accounts",
post(agents::create_account::<DB>).get(agents::list_accounts::<DB>),
)
.route("/api/v1/accounts/{id}", get(agents::get_account::<DB>))
// Derivation (build) endpoints -- look up build info by derivation
// path, view build logs, and trigger retry/cancel via the scheduler.
.route(
"/api/v1/accounts/{id}/derivations/{drvPath}",
get(builds::get_derivation::<DB>),
)
.route(
"/api/v1/accounts/{id}/derivations/{drvPath}/log/lines",
get(builds::get_derivation_log::<DB>),
)
.route(
"/api/v1/accounts/{id}/derivations/{drvPath}/retry",
post(builds::retry_build::<DB>),
)
.route(
"/api/v1/accounts/{id}/derivations/{drvPath}/cancel",
post(builds::cancel_build::<DB>),
)
// Project endpoints -- CRUD for projects. Each project is linked to
// a forge repository and an account.
.route(
"/api/v1/projects",
post(projects::create_project::<DB>).get(projects::list_projects::<DB>),
)
.route(
"/api/v1/projects/{id}",
get(projects::get_project::<DB>).patch(projects::update_project::<DB>),
)
// Jobs scoped to a project -- paginated listing of CI jobs.
.route(
"/api/v1/projects/{id}/jobs",
get(jobs::list_jobs::<DB>),
)
// Job endpoints -- individual job details, evaluation results,
// and rerun/cancel actions that delegate to the scheduler.
.route("/api/v1/jobs/{id}", get(jobs::get_job::<DB>))
.route(
"/api/v1/jobs/{id}/evaluation",
get(jobs::get_evaluation::<DB>),
)
.route("/api/v1/jobs/{id}/rerun", post(jobs::rerun_job::<DB>))
.route("/api/v1/jobs/{id}/cancel", post(jobs::cancel_job::<DB>))
// Effect endpoints -- view effect status and logs, cancel running effects.
.route(
"/api/v1/jobs/{id}/effects/{attr}",
get(effects::get_effect::<DB>),
)
.route(
"/api/v1/jobs/{id}/effects/{attr}/log/lines",
get(effects::get_effect_log::<DB>),
)
.route(
"/api/v1/jobs/{id}/effects/{attr}/cancel",
post(effects::cancel_effect::<DB>),
)
// Task endpoints -- the agent-facing API for task dispatch and reporting.
// Agents poll POST /tasks to get work, then report status, evaluation
// events, build events, and log entries.
.route("/api/v1/tasks", post(tasks::poll_task::<DB>))
.route("/api/v1/tasks/{id}", post(tasks::update_task::<DB>))
.route(
"/api/v1/tasks/{id}/eval",
post(tasks::task_eval_event::<DB>),
)
.route(
"/api/v1/tasks/{id}/build",
post(tasks::task_build_event::<DB>),
)
.route("/api/v1/tasks/log", post(tasks::task_log::<DB>))
// State file endpoints -- binary blob upload/download with versioning,
// powering the `hci state` CLI command. Includes distributed locking
// to prevent concurrent writes.
.route(
"/api/v1/projects/{id}/state/{name}/data",
put(state_files::put_state::<DB>).get(state_files::get_state::<DB>),
)
.route(
"/api/v1/projects/{id}/states",
get(state_files::list_states::<DB>),
)
.route(
"/api/v1/projects/{id}/lock/{name}",
post(state_files::acquire_lock::<DB>),
)
.route(
"/api/v1/lock-leases/{id}",
post(state_files::renew_lock::<DB>).delete(state_files::release_lock::<DB>),
)
// Current task state -- effect-scoped state access. Effects use their
// JWT token (which contains the project_id) to read/write state files
// without needing to know the project ID explicitly.
.route(
"/api/v1/current-task/state/{name}/data",
get(state_files::get_current_task_state::<DB>)
.put(state_files::put_current_task_state::<DB>),
)
// Webhook endpoints -- receive and verify push/PR events from forges.
// After signature verification and event parsing, the event is forwarded
// to the scheduler which creates a new job.
.route(
"/api/v1/webhooks/github",
post(webhooks::github_webhook::<DB>),
)
.route(
"/api/v1/webhooks/gitea",
post(webhooks::gitea_webhook::<DB>),
)
// Auth endpoints -- JWT token creation for user login (via username/
// password) and effect token issuance (scoped to a project).
.route("/api/v1/auth/token", post(auth::create_token::<DB>))
.route(
"/api/v1/projects/{id}/user-effect-token",
post(auth::create_effect_token::<DB>),
)
.with_state(shared_state)
}

View file

@ -0,0 +1,145 @@
//! # Project management endpoints
//!
//! Projects are the central organizational unit for CI work in Jupiter.
//! Each project is linked to a forge repository (via `repo_id`) and belongs
//! to an account (via `account_id`). When a webhook fires for a repository,
//! the scheduler looks up the corresponding project to create a new job.
//!
//! Projects can be enabled or disabled. When disabled, incoming webhooks
//! for that project's repository are ignored and no new jobs are created.
//!
//! ## Data flow
//!
//! ```text
//! Forge webhook ──> Scheduler finds project by repo ──> Creates Job
//! │
//! ┌──────────────────────────┘
//! v
//! Evaluation ──> Builds ──> Effects
//! ```
use axum::{
extract::{Json, Path, State},
http::StatusCode,
response::IntoResponse,
};
use serde_json::{json, Value};
use std::sync::Arc;
use uuid::Uuid;
use jupiter_db::backend::StorageBackend;
use crate::state::AppState;
/// Handle `POST /api/v1/projects` -- create a new project.
///
/// Links a forge repository to an account, creating the project record that
/// ties together webhooks, jobs, builds, and state files.
///
/// ## Request body
///
/// ```json
/// {
/// "accountId": "<uuid>",
/// "repoId": "<uuid>",
/// "name": "my-project"
/// }
/// ```
///
/// All three fields are required. Returns 201 Created with the full project
/// record on success, or 400 Bad Request if any field is missing.
pub async fn create_project<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let account_id = body
.get("accountId")
.and_then(|v| v.as_str())
.and_then(|s| Uuid::parse_str(s).ok());
let repo_id = body
.get("repoId")
.and_then(|v| v.as_str())
.and_then(|s| Uuid::parse_str(s).ok());
let name = body.get("name").and_then(|v| v.as_str());
match (account_id, repo_id, name) {
(Some(aid), Some(rid), Some(n)) => {
match state.db.create_project(aid, rid, n).await {
Ok(project) => (StatusCode::CREATED, Json(json!(project))).into_response(),
Err(e) => (
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": e.to_string()})),
)
.into_response(),
}
}
_ => (
StatusCode::BAD_REQUEST,
Json(json!({"error": "accountId, repoId, and name required"})),
)
.into_response(),
}
}
/// Handle `GET /api/v1/projects/{id}` -- get a specific project by UUID.
///
/// Returns the full project record including account linkage, repository info,
/// and enabled status. Returns 404 if the project does not exist.
pub async fn get_project<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(id): Path<String>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
match state.db.get_project(uuid).await {
Ok(project) => Json(json!(project)).into_response(),
Err(jupiter_db::error::DbError::NotFound { .. }) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `PATCH /api/v1/projects/{id}` -- update project settings.
///
/// Currently supports toggling the `enabled` flag. When a project is disabled,
/// incoming webhooks for its repository are silently ignored by the scheduler.
///
/// ## Request body
///
/// ```json
/// { "enabled": false }
/// ```
///
/// If `enabled` is not provided, defaults to `true`.
pub async fn update_project<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(id): Path<String>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
let enabled = body
.get("enabled")
.and_then(|v| v.as_bool())
.unwrap_or(true);
match state.db.update_project(uuid, enabled).await {
Ok(project) => Json(json!(project)).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/projects` -- list all projects.
///
/// Returns a JSON array of all project records. Used by the web UI dashboard
/// and the `hci project list` CLI command to browse available projects.
pub async fn list_projects<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
) -> impl IntoResponse {
match state.db.list_projects().await {
Ok(projects) => Json(json!(projects)).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}

View file

@ -0,0 +1,311 @@
//! # State file endpoints
//!
//! State files are binary blobs stored per-project that persist across CI jobs.
//! They power the `hci state` CLI feature, which allows effects to read and
//! write arbitrary data (e.g. Terraform state, deployment manifests, secrets).
//!
//! ## Two access patterns
//!
//! 1. **Direct project access** (`/projects/{id}/state/{name}/data`):
//! Used by the `hci` CLI and web UI with a User-scoped JWT token. The
//! project ID is explicit in the URL path.
//!
//! 2. **Effect-scoped access** (`/current-task/state/{name}/data`):
//! Used by effects during execution with an Effect-scoped JWT token. The
//! project ID is extracted from the token's `sub` field, so the effect
//! does not need to know its project ID -- it simply reads/writes "its"
//! state. This prevents cross-project access.
//!
//! ## Distributed locking
//!
//! To prevent concurrent writes, state files support distributed locking:
//!
//! ```text
//! Client ──POST /projects/:id/lock/:name──> Acquire lock (TTL: 300s)
//! │ │
//! │ ┌── 201 Created: lock acquired ─────────┘
//! │ │ (returns lock_id + lease)
//! │ │
//! │ ├── POST /lock-leases/:id ──> Renew lock (extend TTL)
//! │ │
//! │ ├── PUT /projects/:id/state/:name/data ──> Upload new state
//! │ │
//! │ └── DELETE /lock-leases/:id ──> Release lock
//! │
//! └── 409 Conflict: lock held by another client
//! ```
//!
//! Locks have a TTL (default 300 seconds) and expire automatically if not
//! renewed, preventing deadlocks when a client crashes.
use axum::{
body::Bytes,
extract::{Json, Path, State},
http::{HeaderMap, StatusCode},
response::IntoResponse,
};
use serde_json::{json, Value};
use std::sync::Arc;
use uuid::Uuid;
use jupiter_db::backend::StorageBackend;
use crate::auth::parse_effect_token;
use crate::state::AppState;
/// Handle `PUT /api/v1/projects/{project_id}/state/{name}/data` -- upload a
/// state file.
///
/// Accepts a raw binary body and stores it as a versioned state file blob
/// associated with the given project and name. The name is a user-chosen
/// identifier (e.g. `"terraform.tfstate"`, `"deploy-info"`).
///
/// Returns 204 No Content on success. The previous version is overwritten.
pub async fn put_state<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((project_id, name)): Path<(String, String)>,
body: Bytes,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&project_id) {
Ok(u) => u,
Err(_) => return StatusCode::BAD_REQUEST.into_response(),
};
match state.db.put_state_file(uuid, &name, &body).await {
Ok(()) => StatusCode::NO_CONTENT.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/projects/{project_id}/state/{name}/data` -- download
/// a state file.
///
/// Returns the raw binary blob as `application/octet-stream`, or 404 if no
/// state file exists with the given name for this project.
pub async fn get_state<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((project_id, name)): Path<(String, String)>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&project_id) {
Ok(u) => u,
Err(_) => return StatusCode::BAD_REQUEST.into_response(),
};
match state.db.get_state_file(uuid, &name).await {
Ok(Some(data)) => (
StatusCode::OK,
[("content-type", "application/octet-stream")],
data,
)
.into_response(),
Ok(None) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/projects/{project_id}/states` -- list all state files
/// for a project.
///
/// Returns a JSON array of state file metadata (names, sizes, last modified
/// timestamps). Does not include the actual blob data.
pub async fn list_states<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(project_id): Path<String>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&project_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
match state.db.list_state_files(uuid).await {
Ok(files) => Json(json!(files)).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/projects/{project_id}/lock/{name}` -- acquire a
/// distributed lock on a state file.
///
/// Locks prevent concurrent writes to a state file. The lock is associated
/// with an owner string (for debugging) and a TTL in seconds. If the lock
/// is already held by another client, returns 409 Conflict.
///
/// ## Request body
///
/// ```json
/// {
/// "owner": "hci-cli-user@hostname",
/// "ttlSeconds": 300
/// }
/// ```
///
/// ## Response (201 Created)
///
/// A lock lease record including the `id` that must be used for renewal
/// and release.
pub async fn acquire_lock<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path((project_id, name)): Path<(String, String)>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&project_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
let owner = body
.get("owner")
.and_then(|v| v.as_str())
.unwrap_or("unknown");
let ttl = body
.get("ttlSeconds")
.and_then(|v| v.as_u64())
.unwrap_or(300);
match state.db.acquire_lock(uuid, &name, owner, ttl).await {
Ok(lock) => (StatusCode::CREATED, Json(json!(lock))).into_response(),
Err(jupiter_db::error::DbError::Conflict(_)) => StatusCode::CONFLICT.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/lock-leases/{lock_id}` -- renew a lock lease.
///
/// Extends the TTL of an existing lock to prevent it from expiring while
/// a long-running operation is in progress. The lock must still be held
/// by the caller (identified by the lock_id from the acquire response).
///
/// ## Request body
///
/// ```json
/// { "ttlSeconds": 300 }
/// ```
///
/// Returns the updated lock record, or 404 if the lock has already expired.
pub async fn renew_lock<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(lock_id): Path<String>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&lock_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
let ttl = body
.get("ttlSeconds")
.and_then(|v| v.as_u64())
.unwrap_or(300);
match state.db.renew_lock(uuid, ttl).await {
Ok(lock) => Json(json!(lock)).into_response(),
Err(jupiter_db::error::DbError::NotFound { .. }) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `DELETE /api/v1/lock-leases/{lock_id}` -- release a lock.
///
/// Explicitly releases a lock before its TTL expires, allowing other clients
/// to acquire it immediately. Returns 204 No Content on success.
///
/// If the lock has already expired (or was never acquired), this is a no-op
/// that still returns success.
pub async fn release_lock<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(lock_id): Path<String>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&lock_id) {
Ok(u) => u,
Err(_) => return StatusCode::BAD_REQUEST.into_response(),
};
match state.db.release_lock(uuid).await {
Ok(()) => StatusCode::NO_CONTENT.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `GET /api/v1/current-task/state/{name}/data` -- effect-scoped state
/// file download.
///
/// This endpoint is used by effects during execution. The project ID is not
/// in the URL; instead, it is extracted from the effect-scoped JWT token in
/// the `Authorization` header. This ensures effects can only read state files
/// belonging to their own project.
///
/// Returns the raw binary blob as `application/octet-stream`, or 404 if the
/// state file does not exist. Returns 401 if the token is missing, invalid,
/// expired, or does not have the Effect scope.
pub async fn get_current_task_state<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
headers: HeaderMap,
Path(name): Path<String>,
) -> impl IntoResponse {
let project_id = match extract_project_from_effect_token(&state, &headers) {
Ok(id) => id,
Err(status) => return status.into_response(),
};
match state.db.get_state_file(project_id, &name).await {
Ok(Some(data)) => (
StatusCode::OK,
[("content-type", "application/octet-stream")],
data,
)
.into_response(),
Ok(None) => StatusCode::NOT_FOUND.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `PUT /api/v1/current-task/state/{name}/data` -- effect-scoped state
/// file upload.
///
/// Like [`get_current_task_state`], the project ID comes from the effect JWT
/// token rather than the URL. The raw binary body replaces the current version
/// of the named state file.
///
/// Returns 204 No Content on success, or 401 if the token is invalid.
pub async fn put_current_task_state<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
headers: HeaderMap,
Path(name): Path<String>,
body: Bytes,
) -> impl IntoResponse {
let project_id = match extract_project_from_effect_token(&state, &headers) {
Ok(id) => id,
Err(status) => return status.into_response(),
};
match state.db.put_state_file(project_id, &name, &body).await {
Ok(()) => StatusCode::NO_CONTENT.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Extract the project UUID from an effect-scoped JWT token in the
/// `Authorization: Bearer <token>` header.
///
/// This helper is used by the `/current-task/state` endpoints to determine
/// which project's state files the caller is authorized to access. It:
///
/// 1. Reads the `Authorization` header and strips the `Bearer ` prefix.
/// 2. Calls [`parse_effect_token`] to verify the JWT signature, check
/// expiration, and confirm the token has `TokenScope::Effect`.
/// 3. Parses the `sub` claim (which contains the project ID) as a UUID.
///
/// Returns `Err(StatusCode::UNAUTHORIZED)` if any step fails.
fn extract_project_from_effect_token<DB: StorageBackend>(
state: &AppState<DB>,
headers: &HeaderMap,
) -> Result<Uuid, StatusCode> {
let auth_header = headers
.get("authorization")
.and_then(|v| v.to_str().ok())
.ok_or(StatusCode::UNAUTHORIZED)?;
let token = auth_header
.strip_prefix("Bearer ")
.ok_or(StatusCode::UNAUTHORIZED)?;
let (project_id_str, _job_id, _attr_path) =
parse_effect_token(&state.config.jwt_private_key, token)
.map_err(|_| StatusCode::UNAUTHORIZED)?;
Uuid::parse_str(&project_id_str).map_err(|_| StatusCode::UNAUTHORIZED)
}

View file

@ -0,0 +1,432 @@
//! # Task management endpoints
//!
//! Tasks are the atomic units of work dispatched to agents. There are three
//! task types: evaluation (discover Nix attributes), build (run `nix-build`),
//! and effect (run a side-effecting action). This module provides both the
//! agent-facing polling/reporting API and the REST-based agent lifecycle
//! endpoints.
//!
//! ## Agent task flow
//!
//! ```text
//! Agent ──POST /tasks──> Server dequeues pending task for agent's platform
//! │ │
//! │ └─> Marks task as Running, assigns to agent
//! │
//! ├──POST /tasks/:id──> Update task status (running, success, failure)
//! │
//! ├──POST /tasks/:id/eval──> Report evaluation events (attribute, done)
//! │ └─> Forwarded as SchedulerEvent
//! │
//! ├──POST /tasks/:id/build──> Report build events (done with success/failure)
//! │ └─> Forwarded as SchedulerEvent
//! │
//! └──POST /tasks/log──> Send structured log entries for storage
//! ```
//!
//! ## REST agent lifecycle
//!
//! For agents that use HTTP polling instead of WebSocket, three endpoints
//! manage the session lifecycle:
//!
//! - `POST /agent/session` -- create a new session (equivalent to AgentHello)
//! - `POST /agent/heartbeat` -- periodic liveness signal to prevent timeout
//! - `POST /agent/goodbye` -- graceful shutdown, triggers scheduler notification
use axum::{
extract::{Json, Path, State},
http::StatusCode,
response::IntoResponse,
};
use serde_json::{json, Value};
use std::sync::Arc;
use uuid::Uuid;
use jupiter_api_types::TaskStatus;
use jupiter_db::backend::StorageBackend;
use jupiter_scheduler::engine::SchedulerEvent;
use crate::state::AppState;
/// Handle `POST /api/v1/tasks` -- agent polls for the next available task.
///
/// The agent sends its session ID, supported platforms, and system features.
/// The server scans each platform in order and attempts to dequeue a pending
/// task that matches. If a task is found, it is marked as Running and assigned
/// to the requesting agent.
///
/// ## Request body
///
/// ```json
/// {
/// "agentSessionId": "<uuid>",
/// "platforms": ["x86_64-linux", "aarch64-linux"],
/// "systemFeatures": ["kvm", "big-parallel"]
/// }
/// ```
///
/// ## Response
///
/// - **200 OK** with task details (`taskId`, `taskType`, `payload`) if work
/// is available.
/// - **204 No Content** if no pending tasks match the agent's capabilities.
/// The agent should back off and poll again after a delay.
pub async fn poll_task<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let agent_session_id = body
.get("agentSessionId")
.and_then(|v| v.as_str())
.and_then(|s| Uuid::parse_str(s).ok());
let platforms: Vec<String> = body
.get("platforms")
.and_then(|v| serde_json::from_value(v.clone()).ok())
.unwrap_or_default();
let system_features: Vec<String> = body
.get("systemFeatures")
.and_then(|v| serde_json::from_value(v.clone()).ok())
.unwrap_or_default();
let agent_id = match agent_session_id {
Some(id) => id,
None => {
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": "agentSessionId required"})),
)
.into_response()
}
};
// Try to dequeue a pending task for each of the agent's supported
// platforms, in order. The first match wins.
for platform in &platforms {
match state.db.dequeue_task(platform, &system_features).await {
Ok(Some((task_id, task_type, payload))) => {
// Mark the task as running and assign it to this agent session.
let _ = state
.db
.update_task_status(task_id, TaskStatus::Running, Some(agent_id))
.await;
return Json(json!({
"taskId": task_id,
"taskType": task_type.to_string(),
"payload": payload,
}))
.into_response();
}
Ok(None) => continue,
Err(e) => {
return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response()
}
}
}
// No tasks available for any of the agent's platforms.
StatusCode::NO_CONTENT.into_response()
}
/// Handle `POST /api/v1/tasks/{id}` -- update task status.
///
/// Agents use this to report status transitions (e.g. from "running" to
/// "success" or "failure"). The status string is parsed into [`TaskStatus`]
/// and stored in the database.
///
/// ## Request body
///
/// ```json
/// { "status": "success" }
/// ```
///
/// Valid status values depend on [`TaskStatus`]: `"pending"`, `"running"`,
/// `"success"`, `"failure"`, `"cancelled"`.
pub async fn update_task<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(task_id): Path<String>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let uuid = match Uuid::parse_str(&task_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
let status_str = body
.get("status")
.and_then(|v| v.as_str())
.unwrap_or("running");
let status = match status_str.parse::<TaskStatus>() {
Ok(s) => s,
Err(_) => {
return (StatusCode::BAD_REQUEST, "invalid status").into_response();
}
};
match state.db.update_task_status(uuid, status, None).await {
Ok(()) => Json(json!({"id": task_id, "status": status_str})).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/tasks/{id}/eval` -- receive evaluation events from
/// an agent.
///
/// During evaluation, the agent discovers Nix attributes and sends them here.
/// Each event is translated into a [`SchedulerEvent`] for the scheduler:
///
/// - `"attribute"` -- a new attribute was discovered, with its path and
/// derivation path. Forwarded as `SchedulerEvent::AttributeDiscovered`.
/// - `"done"` -- evaluation is complete. Forwarded as
/// `SchedulerEvent::EvaluationComplete`, which triggers the scheduler to
/// begin dispatching build tasks.
///
/// The task's `job_id` is resolved from the database to associate the event
/// with the correct job.
pub async fn task_eval_event<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(task_id): Path<String>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let task_uuid = match Uuid::parse_str(&task_id) {
Ok(u) => u,
Err(_) => return (StatusCode::BAD_REQUEST, "invalid UUID").into_response(),
};
// Resolve the job_id from the task record in the database.
let job_id = match state.db.get_task_job_id(task_uuid).await {
Ok(id) => id,
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
};
let event_type = body.get("type").and_then(|v| v.as_str()).unwrap_or("");
match event_type {
"attribute" => {
let path = body
.get("path")
.and_then(|v| serde_json::from_value::<Vec<String>>(v.clone()).ok())
.unwrap_or_default();
let drv_path = body
.get("derivationPath")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let _ = state
.scheduler_tx
.send(SchedulerEvent::AttributeDiscovered {
job_id,
path,
derivation_path: Some(drv_path),
typ: jupiter_api_types::AttributeType::Regular,
error: None,
})
.await;
}
"done" => {
let _ = state
.scheduler_tx
.send(SchedulerEvent::EvaluationComplete {
job_id,
task_id: task_uuid,
})
.await;
}
_ => {}
}
StatusCode::OK.into_response()
}
/// Handle `POST /api/v1/tasks/{id}/build` -- receive build events from an agent.
///
/// Currently supports the `"done"` event type, which reports a build completion
/// with a success/failure flag. The event is forwarded to the scheduler as
/// `SchedulerEvent::BuildComplete` with the derivation path and build ID.
///
/// ## Request body
///
/// ```json
/// {
/// "type": "done",
/// "derivationPath": "/nix/store/...",
/// "success": true,
/// "buildId": "<uuid>"
/// }
/// ```
pub async fn task_build_event<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Path(_task_id): Path<String>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let event_type = body.get("type").and_then(|v| v.as_str()).unwrap_or("");
let drv_path = body
.get("derivationPath")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
match event_type {
"done" => {
let success = body
.get("success")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let build_id = body
.get("buildId")
.and_then(|v| v.as_str())
.and_then(|s| Uuid::parse_str(s).ok())
.unwrap_or(Uuid::nil());
let _ = state
.scheduler_tx
.send(SchedulerEvent::BuildComplete {
build_id,
derivation_path: drv_path,
success,
})
.await;
}
_ => {}
}
StatusCode::OK.into_response()
}
/// Handle `POST /api/v1/tasks/log` -- receive structured log entries from an agent.
///
/// Agents batch log output and send it here for persistent storage. Each entry
/// contains a timestamp, log level, and message. The entries are associated with
/// a task ID so they can be retrieved later via the build/effect log endpoints.
///
/// ## Request body
///
/// ```json
/// {
/// "taskId": "<uuid>",
/// "entries": [
/// { "i": 0, "o": "stdout", "t": 1234567890, "s": "building..." }
/// ]
/// }
/// ```
pub async fn task_log<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let task_id = body
.get("taskId")
.and_then(|v| v.as_str())
.and_then(|s| Uuid::parse_str(s).ok());
let entries = body
.get("entries")
.and_then(|v| serde_json::from_value::<Vec<jupiter_api_types::LogEntry>>(v.clone()).ok())
.unwrap_or_default();
if let Some(tid) = task_id {
let _ = state.db.store_log_entries(tid, &entries).await;
}
StatusCode::OK.into_response()
}
/// Handle `POST /api/v1/agent/session` -- create an agent session via REST.
///
/// This is the HTTP equivalent of the WebSocket AgentHello handshake. Agents
/// that prefer HTTP polling (instead of a persistent WebSocket connection)
/// use this endpoint to register their session, then poll `/tasks` for work.
///
/// The request body must be a valid [`AgentHello`] JSON object containing
/// hostname, platforms, and agent metadata.
///
/// Returns 201 Created with the session record.
pub async fn create_agent_session<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let hello: jupiter_api_types::AgentHello = match serde_json::from_value(body) {
Ok(h) => h,
Err(e) => {
return (StatusCode::BAD_REQUEST, e.to_string()).into_response();
}
};
// Use nil account_id for now; in production, the cluster_join_token
// in the request should be verified to determine the account.
let account_id = Uuid::nil();
match state.db.create_agent_session(&hello, account_id).await {
Ok(session) => (StatusCode::CREATED, Json(json!(session))).into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
}
}
/// Handle `POST /api/v1/agent/heartbeat` -- agent liveness signal.
///
/// Agents send periodic heartbeats to indicate they are still alive and
/// processing tasks. The server updates the session's `last_heartbeat`
/// timestamp. If an agent stops sending heartbeats, the server may
/// eventually garbage-collect its session and reassign tasks.
///
/// ## Request body
///
/// ```json
/// { "sessionId": "<uuid>" }
/// ```
pub async fn agent_heartbeat<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let session_id = body
.get("sessionId")
.and_then(|v| v.as_str())
.and_then(|s| Uuid::parse_str(s).ok());
match session_id {
Some(id) => match state.db.update_agent_heartbeat(id).await {
Ok(()) => StatusCode::OK.into_response(),
Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(),
},
None => (StatusCode::BAD_REQUEST, "sessionId required").into_response(),
}
}
/// Handle `POST /api/v1/agent/goodbye` -- graceful agent shutdown.
///
/// When an agent shuts down cleanly, it sends a goodbye message so the server
/// can immediately clean up the session and notify the scheduler. This is
/// preferable to waiting for heartbeat timeout, as the scheduler can
/// reassign in-progress tasks right away.
///
/// The session is deleted from the database and a
/// [`SchedulerEvent::AgentDisconnected`] is sent to the scheduler.
///
/// ## Request body
///
/// ```json
/// { "sessionId": "<uuid>" }
/// ```
pub async fn agent_goodbye<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
Json(body): Json<Value>,
) -> impl IntoResponse {
let session_id = body
.get("sessionId")
.and_then(|v| v.as_str())
.and_then(|s| Uuid::parse_str(s).ok());
match session_id {
Some(id) => {
// Delete the session from the database.
let _ = state.db.delete_agent_session(id).await;
// Notify the scheduler so it can reassign any tasks that were
// assigned to this agent.
let _ = state
.scheduler_tx
.send(SchedulerEvent::AgentDisconnected {
agent_session_id: id,
})
.await;
StatusCode::OK.into_response()
}
None => (StatusCode::BAD_REQUEST, "sessionId required").into_response(),
}
}

View file

@ -0,0 +1,175 @@
//! # Forge webhook endpoints
//!
//! These endpoints receive webhook events from source code forges (GitHub,
//! Gitea) and forward them to the scheduler to trigger CI jobs. The flow is:
//!
//! ```text
//! GitHub/Gitea ──POST /webhooks/github or /webhooks/gitea──> Jupiter
//! │
//! ├── 1. Extract signature header and event type header
//! ├── 2. Find the matching ForgeProvider by type
//! ├── 3. Verify the HMAC signature against the webhook secret
//! ├── 4. Parse the event payload (push, PR, etc.)
//! └── 5. Send SchedulerEvent::ForgeEvent to the scheduler
//! │
//! v
//! Scheduler looks up project by repo
//! Creates a new Job for the commit
//! ```
//!
//! ## Signature verification
//!
//! Each forge uses a different signature scheme:
//! - **GitHub**: HMAC-SHA256 in the `X-Hub-Signature-256` header.
//! - **Gitea**: HMAC-SHA256 in the `X-Gitea-Signature` header.
//!
//! The webhook secret is configured per-forge in `jupiter.toml` and passed to
//! the [`ForgeProvider::verify_webhook`] method. If verification fails, the
//! endpoint returns 401 Unauthorized.
//!
//! ## Supported events
//!
//! The [`ForgeProvider::parse_webhook`] method determines which events are
//! actionable (typically push events and pull request events). Unrecognized
//! event types are silently ignored with a 200 OK response.
use axum::{
body::Bytes,
extract::State,
http::{HeaderMap, StatusCode},
response::IntoResponse,
};
use std::sync::Arc;
use tracing::{info, warn};
use jupiter_api_types::ForgeType;
use jupiter_db::backend::StorageBackend;
use jupiter_scheduler::engine::SchedulerEvent;
use crate::state::AppState;
/// Handle `POST /api/v1/webhooks/github` -- receive GitHub webhook events.
///
/// Delegates to [`handle_webhook`] with GitHub-specific header names:
/// - Signature: `X-Hub-Signature-256` (HMAC-SHA256)
/// - Event type: `X-GitHub-Event` (e.g. `"push"`, `"pull_request"`)
pub async fn github_webhook<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
headers: HeaderMap,
body: Bytes,
) -> impl IntoResponse {
handle_webhook(
state,
&headers,
&body,
ForgeType::GitHub,
"X-Hub-Signature-256",
"X-GitHub-Event",
)
.await
}
/// Handle `POST /api/v1/webhooks/gitea` -- receive Gitea webhook events.
///
/// Delegates to [`handle_webhook`] with Gitea-specific header names:
/// - Signature: `X-Gitea-Signature` (HMAC-SHA256)
/// - Event type: `X-Gitea-Event` (e.g. `"push"`, `"pull_request"`)
pub async fn gitea_webhook<DB: StorageBackend>(
State(state): State<Arc<AppState<DB>>>,
headers: HeaderMap,
body: Bytes,
) -> impl IntoResponse {
handle_webhook(
state,
&headers,
&body,
ForgeType::Gitea,
"X-Gitea-Signature",
"X-Gitea-Event",
)
.await
}
/// Shared webhook handler for all forge types.
///
/// This function implements the common webhook processing pipeline:
///
/// 1. Extract the signature and event type from forge-specific headers.
/// 2. Find the configured [`ForgeProvider`] that matches the given forge type.
/// Returns 404 if no forge of that type is configured.
/// 3. Verify the webhook signature using the forge's secret. Returns 401 if
/// the signature is invalid.
/// 4. Parse the event payload. Returns the parsed event to the scheduler
/// via [`SchedulerEvent::ForgeEvent`], or ignores unsupported event types.
///
/// # Arguments
///
/// * `state` -- shared application state containing forge providers and the
/// scheduler channel.
/// * `headers` -- HTTP headers from the webhook request.
/// * `body` -- raw request body bytes (needed for signature verification).
/// * `forge_type` -- which forge sent this webhook (GitHub, Gitea).
/// * `sig_header` -- the header name containing the HMAC signature.
/// * `event_header` -- the header name containing the event type string.
async fn handle_webhook<DB: StorageBackend>(
state: Arc<AppState<DB>>,
headers: &HeaderMap,
body: &[u8],
forge_type: ForgeType,
sig_header: &str,
event_header: &str,
) -> impl IntoResponse {
// Extract the signature (optional for some forges) and event type (required).
let signature = headers.get(sig_header).and_then(|v| v.to_str().ok());
let event_type = match headers.get(event_header).and_then(|v| v.to_str().ok()) {
Some(e) => e,
None => {
return (StatusCode::BAD_REQUEST, "missing event type header").into_response();
}
};
// Find the configured forge provider that matches this forge type.
let forge = state
.forges
.iter()
.find(|(_, f)| f.forge_type() == forge_type);
let (forge_id, forge) = match forge {
Some((id, f)) => (*id, f.as_ref()),
None => {
return (StatusCode::NOT_FOUND, "forge not configured").into_response();
}
};
// Verify the HMAC signature against the forge's webhook secret.
match forge.verify_webhook(signature, body) {
Ok(true) => {}
Ok(false) => {
return (StatusCode::UNAUTHORIZED, "invalid signature").into_response();
}
Err(e) => {
return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response();
}
}
// Parse the event payload and forward to the scheduler if actionable.
match forge.parse_webhook(event_type, body) {
Ok(Some(event)) => {
info!("Received forge event: {:?}", event);
let _ = state
.scheduler_tx
.send(SchedulerEvent::ForgeEvent { forge_id, event })
.await;
StatusCode::OK.into_response()
}
Ok(None) => {
// Event type is valid but not actionable (e.g. star, fork).
info!("Ignoring unsupported event type: {}", event_type);
StatusCode::OK.into_response()
}
Err(e) => {
warn!("Failed to parse webhook: {}", e);
(StatusCode::BAD_REQUEST, e.to_string()).into_response()
}
}
}

View file

@ -0,0 +1,243 @@
//! # Shared application state
//!
//! This module defines the core state structures that are shared across all
//! axum handlers via `State<Arc<AppState<DB>>>`. The state is generic over the
//! [`StorageBackend`] trait, allowing the same server code to work against
//! SQLite (for single-node deployments) or PostgreSQL (for production clusters).
//!
//! ## Data flow overview
//!
//! ```text
//! AppState
//! ├── config -- ServerConfig loaded from TOML
//! ├── db -- Arc<DB> shared database handle
//! ├── scheduler_tx -- mpsc::Sender<SchedulerEvent> to the scheduler loop
//! ├── agent_hub -- Arc<RwLock<AgentHub>> tracking live WebSocket agents
//! └── forges -- Arc<Vec<ForgeProvider>> for webhook verification
//! ```
//!
//! When a webhook arrives or an agent reports results, the handler sends a
//! [`SchedulerEvent`] through `scheduler_tx`. The scheduler processes these
//! events, creates/updates database records, and dispatches new tasks to agents
//! by writing into the appropriate `AgentSessionInfo.tx` channel found in the
//! [`AgentHub`].
//!
//! ## Task context tracking
//!
//! When the scheduler dispatches a task to an agent, it stores a [`TaskContext`]
//! in the agent's [`AgentSessionInfo::task_contexts`] map. This allows the
//! WebSocket handler to resolve incoming agent messages (which reference a
//! `task_id`) back to the originating `job_id`, `build_id`, or `effect_id`
//! for database updates and scheduler notifications.
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::{mpsc, RwLock};
use uuid::Uuid;
use jupiter_api_types::{AgentSession, ServerConfig, TaskType};
use jupiter_db::backend::StorageBackend;
use jupiter_forge::ForgeProvider;
use jupiter_scheduler::engine::{SchedulerEngine, SchedulerEvent};
/// Central application state shared across all HTTP and WebSocket handlers.
///
/// This struct is wrapped in `Arc` and passed as axum `State`. It holds
/// everything a handler needs: configuration, database access, the scheduler
/// channel for emitting events, the agent hub for tracking live connections,
/// and the list of configured forge providers.
///
/// The `scheduler` field is an `Option` because it is taken out via
/// [`take_scheduler`](Self::take_scheduler) during startup and moved into a
/// dedicated tokio task. All clones of `AppState` (produced by the manual
/// `Clone` impl) have `scheduler: None` since the engine is not cloneable
/// and only one instance should ever run.
pub struct AppState<DB: StorageBackend> {
/// Server configuration loaded from TOML (listen address, JWT secret, etc.).
pub config: ServerConfig,
/// Shared database handle. All handlers read/write through this reference.
pub db: Arc<DB>,
/// Sending half of the channel to the [`SchedulerEngine`] event loop.
/// Handlers use this to notify the scheduler of webhook events, agent
/// disconnects, build completions, and other state transitions.
pub scheduler_tx: mpsc::Sender<SchedulerEvent>,
/// The scheduler engine itself, present only before [`take_scheduler`] is
/// called during startup. After that this is `None` in all copies.
scheduler: Option<SchedulerEngine<DB>>,
/// Registry of all currently connected agents and their WebSocket channels.
/// Protected by an async `RwLock` so multiple readers can inspect agent
/// state concurrently while writes (connect/disconnect) are serialized.
pub agent_hub: Arc<RwLock<AgentHub>>,
/// Configured forge providers (GitHub, Gitea) used by webhook handlers to
/// verify signatures and parse events. Each entry is a `(forge_id, provider)`
/// pair where the `forge_id` is a stable UUID assigned at configuration time.
pub forges: Arc<Vec<(Uuid, Box<dyn ForgeProvider>)>>,
}
/// Registry of all currently connected agents and their WebSocket sessions.
///
/// The `AgentHub` is the central coordination point between the scheduler
/// (which needs to dispatch tasks to agents) and the WebSocket handler
/// (which manages the actual connections). It is wrapped in
/// `Arc<RwLock<...>>` inside [`AppState`] so it can be accessed from any
/// handler or background task.
pub struct AgentHub {
/// Map from agent session UUID to the session metadata and channel.
pub sessions: HashMap<Uuid, AgentSessionInfo>,
}
/// Tracks the mapping from a dispatched `task_id` back to the job, build,
/// or effect that created it.
///
/// When the scheduler creates a task and sends it to an agent, it populates
/// this context. When the agent later reports back (e.g. `BuildDone`,
/// `EffectDone`), the WebSocket handler looks up the context to determine
/// which database records to update and which [`SchedulerEvent`] to emit.
#[derive(Debug, Clone)]
pub struct TaskContext {
/// The unique ID of the task that was dispatched.
#[allow(dead_code)]
pub task_id: Uuid,
/// Whether this task is an evaluation, build, or effect.
#[allow(dead_code)]
pub task_type: TaskType,
/// The job that this task belongs to. Always set.
pub job_id: Uuid,
/// The build record, if this is a build task.
pub build_id: Option<Uuid>,
/// The effect record, if this is an effect task.
pub effect_id: Option<Uuid>,
}
/// Per-agent session state stored in the [`AgentHub`].
///
/// Each connected agent has one of these entries. It contains the database
/// session record, the channel for sending WebSocket messages to the agent,
/// the pending acknowledgement queue for reliable delivery, and the task
/// context map for correlating agent responses to jobs/builds/effects.
pub struct AgentSessionInfo {
/// The database record for this agent session, containing hostname,
/// platform list, and other metadata from the AgentHello handshake.
#[allow(dead_code)]
pub session: AgentSession,
/// Channel for sending serialized JSON frames to the agent over WebSocket.
/// The WebSocket send loop reads from the corresponding receiver.
pub tx: mpsc::Sender<String>,
/// Messages that have been sent to the agent but not yet acknowledged.
/// Each entry is `(sequence_number, payload)`. When an `Ack { n }` frame
/// arrives, all entries with `seq <= n` are removed. This enables retry
/// semantics: if the connection drops, unacknowledged messages can be
/// re-sent on reconnection.
pub pending_acks: Vec<(u64, serde_json::Value)>,
/// The next sequence number to use when sending a `Msg` frame to this agent.
#[allow(dead_code)]
pub next_seq: u64,
/// Maps `task_id` to [`TaskContext`] for all tasks currently dispatched to
/// this agent. Populated when the scheduler sends a task; looked up when
/// the agent reports results (build done, eval done, etc.).
pub task_contexts: HashMap<Uuid, TaskContext>,
}
impl AgentHub {
/// Create an empty agent hub with no connected sessions.
pub fn new() -> Self {
Self {
sessions: HashMap::new(),
}
}
/// Register a newly connected agent in the hub.
///
/// Stores the session metadata and the sending half of the WebSocket
/// message channel. Returns the session UUID for later reference.
/// The agent is now eligible to receive tasks from the scheduler.
pub fn add_session(&mut self, session: AgentSession, tx: mpsc::Sender<String>) -> Uuid {
let id: Uuid = session.id.clone().into();
self.sessions.insert(
id,
AgentSessionInfo {
session,
tx,
pending_acks: Vec::new(),
next_seq: 1,
task_contexts: HashMap::new(),
},
);
id
}
/// Remove a disconnected agent from the hub.
///
/// This drops the `tx` channel, which will cause the WebSocket send
/// loop to terminate. Any pending task contexts are also discarded;
/// the scheduler should be notified separately via
/// [`SchedulerEvent::AgentDisconnected`] so it can reassign tasks.
pub fn remove_session(&mut self, id: Uuid) {
self.sessions.remove(&id);
}
/// Find an agent that advertises support for the given Nix platform
/// string (e.g. `"x86_64-linux"`).
///
/// This performs a linear scan of all connected sessions and returns
/// the first match. Used by the scheduler when it needs to dispatch
/// a task to a compatible agent.
#[allow(dead_code)]
pub fn find_agent_for_platform(&self, platform: &str) -> Option<Uuid> {
self.sessions
.iter()
.find(|(_, info)| info.session.platforms.contains(&platform.to_string()))
.map(|(id, _)| *id)
}
}
impl<DB: StorageBackend> AppState<DB> {
/// Create a new `AppState` with a freshly constructed [`SchedulerEngine`].
///
/// The scheduler is created internally and its `event_sender()` channel
/// is stored in `scheduler_tx`. The caller must call [`take_scheduler`]
/// to extract the engine and spawn it on a background task before the
/// server starts accepting connections.
pub fn new(config: ServerConfig, db: Arc<DB>) -> Self {
let forges: Arc<Vec<(Uuid, Box<dyn ForgeProvider>)>> = Arc::new(Vec::new());
let scheduler = SchedulerEngine::new(db.clone(), forges.clone());
let scheduler_tx = scheduler.event_sender();
Self {
config,
db,
scheduler_tx,
scheduler: Some(scheduler),
agent_hub: Arc::new(RwLock::new(AgentHub::new())),
forges,
}
}
/// Extract the [`SchedulerEngine`] from this state, leaving `None` behind.
///
/// This must be called exactly once during startup. The returned engine
/// should be spawned on a background tokio task via `tokio::spawn`.
/// Subsequent clones of `AppState` (used by handlers) will have
/// `scheduler: None`, which is correct since only one engine should run.
pub fn take_scheduler(&mut self) -> Option<SchedulerEngine<DB>> {
self.scheduler.take()
}
}
/// Manual `Clone` implementation because [`SchedulerEngine`] does not
/// implement `Clone`. Cloned copies always have `scheduler: None`; only
/// the original holds the engine (until [`take_scheduler`] extracts it).
/// All other fields are cheaply cloneable (`Arc`, channel senders, etc.).
impl<DB: StorageBackend> Clone for AppState<DB> {
fn clone(&self) -> Self {
Self {
config: self.config.clone(),
db: self.db.clone(),
scheduler_tx: self.scheduler_tx.clone(),
scheduler: None,
agent_hub: self.agent_hub.clone(),
forges: self.forges.clone(),
}
}
}

View file

@ -0,0 +1,524 @@
//! # WebSocket handler -- Hercules CI agent wire protocol
//!
//! This module contains the core WebSocket logic for communicating with
//! `hercules-ci-agent` instances. It implements the full connection lifecycle:
//! handshake, bidirectional message processing, and cleanup on disconnect.
//!
//! ## Data flow
//!
//! ```text
//! hercules-ci-agent
//! │
//! │ WebSocket (JSON frames)
//! v
//! ws_handler ──> handle_socket
//! │
//! ├── send_task (tokio::spawn)
//! │ Reads from agent_rx channel, writes to WebSocket sink.
//! │ The scheduler writes into agent_tx (stored in AgentHub)
//! │ to dispatch tasks.
//! │
//! └── recv_task (tokio::spawn)
//! Reads from WebSocket stream, dispatches to:
//! ├── Msg: process_agent_message() ──> SchedulerEvent
//! ├── Ack: removes entries from pending_acks
//! ├── Oob: ignored after handshake
//! └── Exception: logged as warning
//! ```
//!
//! ## Reliable delivery
//!
//! Each `Msg` frame carries a monotonically increasing sequence number. The
//! receiver sends back an `Ack { n }` to confirm receipt of all messages up
//! to `n`. The sender retains unacknowledged messages in `pending_acks` so
//! they can be retransmitted if the connection is re-established.
use axum::{
extract::{
ws::{Message, WebSocket, WebSocketUpgrade},
State,
},
response::IntoResponse,
};
use futures::{SinkExt, StreamExt};
use serde_json::json;
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::{mpsc, RwLock};
use tracing::{error, info, warn};
use uuid::Uuid;
use jupiter_api_types::*;
use jupiter_db::backend::StorageBackend;
use jupiter_scheduler::engine::SchedulerEvent;
use crate::state::{AgentHub, AppState, TaskContext};
/// Axum handler for the WebSocket upgrade at `/api/v1/agent/socket`.
///
/// This is the entry point for agent connections. It accepts the HTTP upgrade
/// request and delegates to [`handle_socket`] which runs the full agent
/// protocol lifecycle on the upgraded WebSocket connection.
pub async fn ws_handler<DB: StorageBackend>(
ws: WebSocketUpgrade,
State(state): State<Arc<AppState<DB>>>,
) -> impl IntoResponse {
ws.on_upgrade(move |socket| handle_socket(socket, state))
}
/// Run the full agent WebSocket protocol on an upgraded connection.
///
/// This function:
/// 1. Sends the `ServiceInfo` out-of-band frame (version `[2, 0]`).
/// 2. Waits for the agent's `AgentHello` OOB frame containing hostname,
/// platforms, and capabilities.
/// 3. Creates a database session for the agent and sends `Ack { n: 0 }`
/// to complete the three-way handshake.
/// 4. Splits the WebSocket into send/receive halves and spawns two tasks:
/// - **send_task**: forwards messages from the `agent_rx` channel to the
/// WebSocket. Other components (scheduler) write to `agent_tx` in the
/// [`AgentHub`] to send messages to this agent.
/// - **recv_task**: reads frames from the WebSocket and processes them
/// via [`process_agent_message`].
/// 5. When either task finishes (connection dropped, error, etc.), aborts
/// the other and cleans up: removes the session from the agent hub,
/// deletes it from the database, and notifies the scheduler.
async fn handle_socket<DB: StorageBackend>(socket: WebSocket, state: Arc<AppState<DB>>) {
let (mut ws_tx, mut ws_rx) = socket.split();
// Step 1: Send ServiceInfo as an out-of-band frame to initiate the
// handshake. The version [2, 0] indicates protocol v2.
let service_info = Frame::Oob {
p: json!({ "version": [2, 0] }),
};
if let Ok(msg) = serde_json::to_string(&service_info) {
if ws_tx.send(Message::Text(msg.into())).await.is_err() {
return;
}
}
// Step 2: Wait for the agent's AgentHello OOB frame. This contains the
// agent's hostname, supported Nix platforms, and other metadata needed
// to create a session record.
let agent_hello: AgentHello = loop {
match ws_rx.next().await {
Some(Ok(Message::Text(text))) => match serde_json::from_str::<Frame>(&text) {
Ok(Frame::Oob { p }) => match serde_json::from_value::<AgentHello>(p) {
Ok(hello) => break hello,
Err(e) => {
warn!("Failed to parse AgentHello: {}", e);
let err = Frame::Exception {
message: format!("Invalid AgentHello: {}", e),
};
let _ = ws_tx
.send(Message::Text(serde_json::to_string(&err).unwrap().into()))
.await;
return;
}
},
Ok(_) => continue,
Err(e) => {
warn!("Failed to parse frame: {}", e);
return;
}
},
Some(Ok(Message::Close(_))) | None => return,
_ => continue,
}
};
info!(
"Agent connected: {} (platforms: {:?})",
agent_hello.hostname, agent_hello.platforms
);
// Step 3: Create a database session for the agent. The account_id is nil
// for now; in production, the cluster_join_token should be verified to
// determine which account the agent belongs to.
let account_id = Uuid::nil();
let session = match state
.db
.create_agent_session(&agent_hello, account_id)
.await
{
Ok(s) => s,
Err(e) => {
error!("Failed to create agent session: {}", e);
let err = Frame::Exception {
message: format!("Failed to create session: {}", e),
};
let _ = ws_tx
.send(Message::Text(serde_json::to_string(&err).unwrap().into()))
.await;
return;
}
};
let session_id: Uuid = session.id.clone().into();
// Send Ack { n: 0 } to complete the three-way handshake. This tells the
// agent that the server is ready to exchange Msg frames.
let ack = Frame::Ack { n: 0 };
if let Ok(msg) = serde_json::to_string(&ack) {
if ws_tx.send(Message::Text(msg.into())).await.is_err() {
return;
}
}
// Step 4: Create a bounded channel for sending messages to this agent.
// The scheduler and other components will write to `agent_tx`, which is
// stored in the AgentHub. The send_task reads from `agent_rx` and
// forwards messages to the WebSocket.
let (agent_tx, mut agent_rx) = mpsc::channel::<String>(100);
// Register the agent in the hub so the scheduler can find it and
// dispatch tasks to it.
{
let mut hub = state.agent_hub.write().await;
hub.add_session(session, agent_tx);
}
info!("Agent session {} established", session_id);
// Spawn the send loop: reads serialized JSON frames from the agent_rx
// channel and writes them to the WebSocket. This task runs until the
// channel is closed (agent disconnects) or a write error occurs.
let mut send_task = tokio::spawn(async move {
while let Some(msg) = agent_rx.recv().await {
if ws_tx.send(Message::Text(msg.into())).await.is_err() {
break;
}
}
});
// Spawn the receive loop: reads frames from the WebSocket and processes
// them. Msg frames are dispatched to process_agent_message() which
// translates agent events into SchedulerEvents. Ack frames update the
// pending_acks buffer to track reliable delivery.
let scheduler_tx = state.scheduler_tx.clone();
let db = state.db.clone();
let agent_hub = state.agent_hub.clone();
let mut recv_task = tokio::spawn(async move {
while let Some(msg_result) = ws_rx.next().await {
match msg_result {
Ok(Message::Text(text)) => match serde_json::from_str::<Frame>(&text) {
Ok(Frame::Msg { n, p }) => {
// Process the agent's message payload (eval results,
// build completions, log entries, etc.)
if let Err(e) =
process_agent_message(&p, session_id, &scheduler_tx, &db, &agent_hub)
.await
{
warn!("Error processing agent message: {}", e);
}
// Acknowledge receipt of this message so the agent
// can remove it from its retry buffer.
let ack = Frame::Ack { n };
let hub = agent_hub.read().await;
if let Some(info) = hub.sessions.get(&session_id) {
let _ = info
.tx
.send(serde_json::to_string(&ack).unwrap())
.await;
}
}
Ok(Frame::Ack { n }) => {
// The agent has acknowledged receipt of our messages
// up to sequence number `n`. Remove those from our
// pending_acks buffer since they no longer need
// retransmission.
let mut hub = agent_hub.write().await;
if let Some(info) = hub.sessions.get_mut(&session_id) {
info.pending_acks.retain(|(seq, _)| *seq > n);
}
}
Ok(Frame::Oob { .. }) => {
// OOB frames are only expected during the handshake
// phase. Ignore any that arrive after.
}
Ok(Frame::Exception { message }) => {
warn!("Agent exception: {}", message);
}
Err(e) => {
warn!("Failed to parse frame: {}", e);
}
},
Ok(Message::Close(_)) => break,
Ok(Message::Ping(_)) => {
// Pong is handled automatically by axum
}
Err(e) => {
warn!("WebSocket error: {}", e);
break;
}
_ => {}
}
}
});
// Step 5: Wait for either the send or receive task to finish, then
// abort the other. This handles both clean disconnects and errors.
tokio::select! {
_ = &mut send_task => {
recv_task.abort();
}
_ = &mut recv_task => {
send_task.abort();
}
}
// Cleanup: remove the agent from the hub, delete the database session,
// and notify the scheduler so it can reassign any in-progress tasks.
info!("Agent session {} disconnected", session_id);
{
let mut hub = state.agent_hub.write().await;
hub.remove_session(session_id);
}
let _ = state.db.delete_agent_session(session_id).await;
// Notify the scheduler that this agent is no longer available. The
// scheduler will mark any tasks assigned to this agent as failed or
// pending re-dispatch.
let _ = state
.scheduler_tx
.send(SchedulerEvent::AgentDisconnected {
agent_session_id: session_id,
})
.await;
}
/// Look up the [`TaskContext`] for a given `task_id` from the agent's context map.
///
/// Returns the `(job_id, build_id, effect_id)` triple associated with the task.
/// If no context is found (which may happen if the task was dispatched before
/// context tracking was added, or if there is a race condition), returns
/// `(Uuid::nil(), None, None)` with a warning log.
fn resolve_task_context(
task_contexts: &HashMap<Uuid, TaskContext>,
task_id: Uuid,
) -> (Uuid, Option<Uuid>, Option<Uuid>) {
match task_contexts.get(&task_id) {
Some(ctx) => (ctx.job_id, ctx.build_id, ctx.effect_id),
None => {
warn!(
"No task context found for task_id {}, using nil UUIDs",
task_id
);
(Uuid::nil(), None, None)
}
}
}
/// Process an incoming message from a connected agent.
///
/// Agent messages are deserialized into [`AgentMessage`] variants and then
/// translated into appropriate [`SchedulerEvent`]s or database operations.
/// The mapping is:
///
/// | Agent message | Action |
/// |--------------------|------------------------------------------------------|
/// | `Started` | Logged (informational) |
/// | `Cancelled` | Logged (informational) |
/// | `Attribute` | `SchedulerEvent::AttributeDiscovered` with type info |
/// | `AttributeEffect` | `SchedulerEvent::AttributeDiscovered` (Effect type) |
/// | `AttributeError` | `SchedulerEvent::AttributeDiscovered` with error |
/// | `DerivationInfo` | `SchedulerEvent::DerivationInfoReceived` |
/// | `BuildRequired` | No-op (reserved for future use) |
/// | `EvaluationDone` | `SchedulerEvent::EvaluationComplete` |
/// | `OutputInfo` | No-op (reserved for future use) |
/// | `Pushed` | No-op (reserved for future use) |
/// | `BuildDone` | `SchedulerEvent::BuildComplete` |
/// | `EffectDone` | `SchedulerEvent::EffectComplete` |
/// | `LogItems` | Stored directly to database via `store_log_entries` |
///
/// Each message that carries a `task_id` is resolved through the agent's
/// [`TaskContext`] map to find the associated `job_id`, `build_id`, or
/// `effect_id` for the scheduler event.
async fn process_agent_message<DB: StorageBackend>(
payload: &serde_json::Value,
session_id: Uuid,
scheduler_tx: &mpsc::Sender<SchedulerEvent>,
db: &Arc<DB>,
agent_hub: &Arc<RwLock<AgentHub>>,
) -> anyhow::Result<()> {
let msg: AgentMessage = serde_json::from_value(payload.clone())?;
// Snapshot the task contexts from the agent hub. This read lock is held
// briefly; we clone the map so we can work with it without holding the
// lock during async scheduler sends.
let task_contexts = {
let hub = agent_hub.read().await;
hub.sessions
.get(&session_id)
.map(|info| info.task_contexts.clone())
.unwrap_or_default()
};
match msg {
AgentMessage::Started { .. } => {
info!("Task started");
}
AgentMessage::Cancelled { .. } => {
info!("Task cancelled");
}
AgentMessage::Attribute {
task_id,
path,
derivation_path,
typ,
..
} => {
// An evaluation task discovered a Nix attribute (e.g. a package
// or a CI job). Forward to the scheduler so it can create build
// records for each discovered derivation.
let task_uuid: Uuid = task_id.into();
let (job_id, _, _) = resolve_task_context(&task_contexts, task_uuid);
let _ = scheduler_tx
.send(SchedulerEvent::AttributeDiscovered {
job_id,
path,
derivation_path: Some(derivation_path),
typ,
error: None,
})
.await;
}
AgentMessage::AttributeEffect {
task_id,
path,
derivation_path,
..
} => {
// An evaluation discovered an effect attribute (side-effecting
// action like deployment). Tagged as AttributeType::Effect so
// the scheduler creates an effect record instead of a build.
let task_uuid: Uuid = task_id.into();
let (job_id, _, _) = resolve_task_context(&task_contexts, task_uuid);
let _ = scheduler_tx
.send(SchedulerEvent::AttributeDiscovered {
job_id,
path,
derivation_path: Some(derivation_path),
typ: AttributeType::Effect,
error: None,
})
.await;
}
AgentMessage::AttributeError {
task_id,
path,
error,
..
} => {
// An evaluation encountered an error while processing an attribute.
// The error is forwarded to the scheduler which will mark the
// attribute as failed in the database.
let task_uuid: Uuid = task_id.into();
let (job_id, _, _) = resolve_task_context(&task_contexts, task_uuid);
let _ = scheduler_tx
.send(SchedulerEvent::AttributeDiscovered {
job_id,
path,
derivation_path: None,
typ: AttributeType::Regular,
error: Some(error),
})
.await;
}
AgentMessage::DerivationInfo {
task_id,
derivation_path,
platform,
required_system_features,
input_derivations,
outputs,
..
} => {
// Detailed info about a derivation (inputs, outputs, platform
// requirements). The scheduler uses this to determine build
// ordering and platform compatibility for task dispatch.
let task_uuid: Uuid = task_id.into();
let (job_id, _, _) = resolve_task_context(&task_contexts, task_uuid);
let _ = scheduler_tx
.send(SchedulerEvent::DerivationInfoReceived {
job_id,
derivation_path,
platform,
required_system_features,
input_derivations,
outputs,
})
.await;
}
AgentMessage::BuildRequired { .. } => {
// Reserved for future use: the agent signals that a build
// dependency is required before it can proceed.
}
AgentMessage::EvaluationDone { task_id } => {
// The agent has finished evaluating all attributes for this task.
// The scheduler will transition the job from "evaluating" to
// "building" and begin dispatching build tasks.
let task_uuid: Uuid = task_id.clone().into();
let (job_id, _, _) = resolve_task_context(&task_contexts, task_uuid);
let _ = scheduler_tx
.send(SchedulerEvent::EvaluationComplete {
job_id,
task_id: task_id.into(),
})
.await;
}
AgentMessage::OutputInfo { .. } => {
// Reserved for future use: output path info from builds.
}
AgentMessage::Pushed { .. } => {
// Reserved for future use: confirmation that build outputs
// have been pushed to the binary cache.
}
AgentMessage::BuildDone {
task_id,
derivation_path,
success,
..
} => {
// A build has completed (either successfully or with failure).
// The scheduler updates the build record status and, if all
// builds for a job succeed, transitions the job to completion.
let task_uuid: Uuid = task_id.into();
let (_, build_id, _) = resolve_task_context(&task_contexts, task_uuid);
let _ = scheduler_tx
.send(SchedulerEvent::BuildComplete {
build_id: build_id.unwrap_or(Uuid::nil()),
derivation_path,
success,
})
.await;
}
AgentMessage::EffectDone {
task_id, success, ..
} => {
// An effect (side-effecting action like deployment) has completed.
// The scheduler updates the effect record and job status.
let task_uuid: Uuid = task_id.into();
let (job_id, _, effect_id) = resolve_task_context(&task_contexts, task_uuid);
let _ = scheduler_tx
.send(SchedulerEvent::EffectComplete {
effect_id: effect_id.unwrap_or(Uuid::nil()),
job_id,
success,
})
.await;
}
AgentMessage::LogItems {
task_id,
log_entries,
} => {
// Structured log output from the agent (build logs, eval logs).
// Stored directly in the database for later retrieval via the
// log endpoints.
let _ = db.store_log_entries(task_id.into(), &log_entries).await;
}
}
Ok(())
}

View file

@ -0,0 +1,33 @@
//! # WebSocket module
//!
//! This module implements the Hercules CI agent wire protocol over WebSocket.
//! The `hercules-ci-agent` connects to `/api/v1/agent/socket` and communicates
//! using a framed JSON protocol with sequenced delivery and acknowledgement
//! semantics.
//!
//! ## Wire protocol overview
//!
//! The protocol uses four frame types (defined in `jupiter-api-types`):
//!
//! - **`Oob`** -- out-of-band messages used only during the handshake phase.
//! The server sends `ServiceInfo` (version negotiation), the agent replies
//! with `AgentHello` (hostname, platforms, capabilities).
//! - **`Msg { n, p }`** -- sequenced data messages carrying a payload `p` and
//! sequence number `n`. Each side independently numbers its outgoing messages.
//! - **`Ack { n }`** -- acknowledges receipt of all messages up to sequence `n`.
//! The sender can discard those messages from its retry buffer.
//! - **`Exception`** -- signals a fatal protocol error, typically followed by
//! connection close.
//!
//! ## Connection lifecycle
//!
//! 1. Server sends `Oob(ServiceInfo)` with the protocol version.
//! 2. Agent sends `Oob(AgentHello)` with hostname, platforms, and agent version.
//! 3. Server creates a database session and sends `Ack { n: 0 }` to confirm.
//! 4. Both sides enter the message loop, exchanging `Msg`/`Ack` frames.
//! 5. On disconnect (or error), the session is removed from the agent hub and
//! the scheduler is notified via `SchedulerEvent::AgentDisconnected`.
//!
//! See [`handler`] for the implementation.
pub mod handler;