init

2026-03-16 22:23:10 +11:00 · 2026-03-16 22:23:10 +11:00 · fd80fbab7e
commit fd80fbab7e
48 changed files with 16775 additions and 0 deletions
--- a/crates/jupiter-db/Cargo.toml
+++ b/crates/jupiter-db/Cargo.toml
@ -0,0 +1,16 @@
+[package]
+name = "jupiter-db"
+version.workspace = true
+edition.workspace = true
+
+[dependencies]
+jupiter-api-types = { workspace = true }
+sqlx = { workspace = true }
+tokio = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+uuid = { workspace = true }
+chrono = { workspace = true }
+thiserror = { workspace = true }
+async-trait = { workspace = true }
+tracing = { workspace = true }
--- a/crates/jupiter-db/migrations/20240101000000_initial.sql
+++ b/crates/jupiter-db/migrations/20240101000000_initial.sql
@ -0,0 +1,337 @@
+-- =======================================================================
+-- Jupiter initial schema
+-- =======================================================================
+--
+-- This migration creates the complete data model for Jupiter, a
+-- self-hosted, wire-compatible replacement for hercules-ci.com.
+--
+-- The schema mirrors the Hercules CI object hierarchy:
+--
+--   Account -> Project -> Job -> [Attributes, Builds, Effects]
+--
+-- Key design choices:
+--   - All IDs are UUIDv4 stored as TEXT (SQLite has no native UUID type).
+--   - All timestamps are TEXT in UTC "YYYY-MM-DD HH:MM:SS" format.
+--   - Booleans are INTEGER 0/1 (SQLite convention).
+--   - Structured data (JSON arrays/objects) are stored as TEXT and
+--     serialized/deserialized at the application layer.
+--   - Foreign keys enforce referential integrity (requires PRAGMA
+--     foreign_keys=ON at connection time).
+-- =======================================================================
+
+-- ── Accounts ─────────────────────────────────────────────────────────
+-- Top-level ownership entity.  Every project, join token, and agent
+-- session belongs to exactly one account.  In Hercules CI an account
+-- can be a "user" or an "organization".
+CREATE TABLE IF NOT EXISTS accounts (
+    id TEXT PRIMARY KEY NOT NULL,
+    name TEXT NOT NULL UNIQUE,                              -- Human-readable display name; also used for login.
+    account_type TEXT NOT NULL DEFAULT 'user',              -- 'user' | 'organization'
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+
+-- ── Cluster Join Tokens ──────────────────────────────────────────────
+-- Bearer tokens that hercules-ci-agent presents during the WebSocket
+-- handshake.  Only the bcrypt hash is stored; the raw token is shown
+-- to the admin once at creation time and never persisted.
+CREATE TABLE IF NOT EXISTS cluster_join_tokens (
+    id TEXT PRIMARY KEY NOT NULL,
+    account_id TEXT NOT NULL REFERENCES accounts(id),       -- Owning account; agent inherits this identity.
+    name TEXT NOT NULL,                                     -- Admin-friendly label (e.g., "prod-agent-1").
+    token_hash TEXT NOT NULL,                               -- bcrypt hash of the raw bearer token.
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+
+-- ── Forges ───────────────────────────────────────────────────────────
+-- A forge is an external code-hosting platform (GitHub, Gitea, etc.).
+-- Webhook secrets and API credentials are stored in `config` (JSON).
+CREATE TABLE IF NOT EXISTS forges (
+    id TEXT PRIMARY KEY NOT NULL,
+    forge_type TEXT NOT NULL,                               -- 'github' | 'gitea' | etc.
+    config TEXT NOT NULL,                                   -- JSON blob with API URL, webhook secret, tokens, etc.
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+
+-- ── Repos ────────────────────────────────────────────────────────────
+-- Mirror of a repository on a forge.  Stores the clone URL and default
+-- branch so agents know where to fetch code.
+-- UNIQUE(forge_id, owner, name) prevents duplicate registrations of
+-- the same repo from different webhook deliveries.
+CREATE TABLE IF NOT EXISTS repos (
+    id TEXT PRIMARY KEY NOT NULL,
+    forge_id TEXT NOT NULL REFERENCES forges(id),           -- Which forge this repo lives on.
+    owner TEXT NOT NULL,                                    -- GitHub/Gitea user or org owning the repo.
+    name TEXT NOT NULL,                                     -- Repository name (without owner prefix).
+    clone_url TEXT NOT NULL,                                -- HTTPS or SSH clone URL.
+    default_branch TEXT NOT NULL DEFAULT 'main',            -- Used to decide if a push triggers effects.
+    created_at TEXT NOT NULL DEFAULT (datetime('now')),
+    UNIQUE(forge_id, owner, name)
+);
+
+-- ── Projects ─────────────────────────────────────────────────────────
+-- A project binds an account to a repo.  It is the primary grouping
+-- entity for jobs, secrets, state files, and schedules.
+-- `enabled` controls whether incoming webhooks create jobs.
+CREATE TABLE IF NOT EXISTS projects (
+    id TEXT PRIMARY KEY NOT NULL,
+    account_id TEXT NOT NULL REFERENCES accounts(id),       -- Owning account.
+    repo_id TEXT NOT NULL REFERENCES repos(id),             -- Backing repository.
+    name TEXT NOT NULL UNIQUE,                              -- Human-readable project name.
+    enabled INTEGER NOT NULL DEFAULT 1,                     -- 1 = active, 0 = paused.
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+
+-- ── Agent Sessions ───────────────────────────────────────────────────
+-- Each connected hercules-ci-agent has one row here.  The session
+-- records the agent's self-reported capabilities so the scheduler can
+-- match tasks to capable agents.
+--
+-- `platforms` is a JSON array of Nix system strings, e.g.,
+-- ["x86_64-linux", "aarch64-linux"].
+-- `system_features` is a JSON array of required features, e.g.,
+-- ["kvm", "big-parallel"].
+CREATE TABLE IF NOT EXISTS agent_sessions (
+    id TEXT PRIMARY KEY NOT NULL,
+    account_id TEXT NOT NULL REFERENCES accounts(id),       -- Account the agent authenticated as.
+    hostname TEXT NOT NULL,                                  -- Self-reported hostname.
+    platforms TEXT NOT NULL,                                 -- JSON array of Nix system strings.
+    system_features TEXT NOT NULL DEFAULT '[]',              -- JSON array of system feature strings.
+    concurrency INTEGER NOT NULL DEFAULT 2,                 -- Max parallel builds this agent supports.
+    agent_version TEXT,                                      -- Agent software version (informational).
+    nix_version TEXT,                                        -- Nix version (informational).
+    connected_at TEXT NOT NULL DEFAULT (datetime('now')),    -- When the WebSocket session started.
+    last_heartbeat TEXT NOT NULL DEFAULT (datetime('now'))   -- Updated on each keepalive ping.
+);
+
+-- ── Jobs ─────────────────────────────────────────────────────────────
+-- A job is a single CI run triggered by a push or PR event.  It
+-- progresses through:
+--   pending -> evaluating -> building -> running_effects -> succeeded / failed
+--
+-- `sequence_number` is per-(project, ref) and monotonically increases.
+-- Effects use it to ensure ordering: effects for sequence N cannot
+-- start until all effects for sequence < N on the same ref are done.
+--
+-- Forge/repo metadata is denormalized for convenient display without
+-- extra joins.
+CREATE TABLE IF NOT EXISTS jobs (
+    id TEXT PRIMARY KEY NOT NULL,
+    project_id TEXT NOT NULL REFERENCES projects(id),
+    forge_type TEXT NOT NULL,                               -- Denormalized from forges.forge_type.
+    repo_owner TEXT NOT NULL,                               -- Denormalized from repos.owner.
+    repo_name TEXT NOT NULL,                                -- Denormalized from repos.name.
+    ref_name TEXT NOT NULL,                                 -- Git ref (e.g., "refs/heads/main").
+    commit_sha TEXT NOT NULL,                               -- Full 40-char SHA.
+    status TEXT NOT NULL DEFAULT 'pending',                 -- Job lifecycle state.
+    sequence_number INTEGER NOT NULL DEFAULT 0,             -- Per-(project, ref) ordering counter.
+    created_at TEXT NOT NULL DEFAULT (datetime('now')),
+    updated_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+-- Speeds up lookups by (project, ref) for the "latest job on branch"
+-- query and for sequence-number computation.
+CREATE INDEX IF NOT EXISTS idx_jobs_project_ref ON jobs(project_id, ref_name);
+
+-- ── Task Queue ───────────────────────────────────────────────────────
+-- Unified dispatch queue for all agent work: evaluation, build, and
+-- effect tasks.  Each task optionally specifies a required `platform`
+-- so the scheduler can route it to a capable agent.
+--
+-- Lifecycle:  pending -> running -> succeeded / failed
+--
+-- If an agent disconnects, its running tasks are reset to pending
+-- (see `requeue_agent_tasks`).
+CREATE TABLE IF NOT EXISTS task_queue (
+    id TEXT PRIMARY KEY NOT NULL,
+    job_id TEXT NOT NULL REFERENCES jobs(id),               -- Owning job.
+    task_type TEXT NOT NULL,                                -- 'evaluation' | 'build' | 'effect'
+    status TEXT NOT NULL DEFAULT 'pending',                 -- 'pending' | 'running' | 'succeeded' | 'failed'
+    platform TEXT,                                          -- Required Nix system (NULL = any agent).
+    required_features TEXT NOT NULL DEFAULT '[]',           -- JSON array of required system features (future use).
+    payload TEXT NOT NULL,                                  -- JSON blob; schema depends on task_type.
+    agent_session_id TEXT REFERENCES agent_sessions(id),    -- Agent that claimed this task (NULL while pending).
+    created_at TEXT NOT NULL DEFAULT (datetime('now')),
+    updated_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+-- Speeds up `dequeue_task`: find the oldest pending task matching a platform.
+CREATE INDEX IF NOT EXISTS idx_task_queue_status ON task_queue(status, platform);
+
+-- ── Attributes (evaluation results) ─────────────────────────────────
+-- During evaluation the agent walks the flake's `herculesCI` output
+-- attribute tree and reports each attribute back.  Each row records
+-- the attribute path (JSON array), its type, an optional derivation
+-- path, and any evaluation error.
+CREATE TABLE IF NOT EXISTS attributes (
+    id TEXT PRIMARY KEY NOT NULL,
+    job_id TEXT NOT NULL REFERENCES jobs(id),
+    path TEXT NOT NULL,                                     -- JSON array of path segments, e.g. '["onPush","default"]'.
+    derivation_path TEXT,                                   -- /nix/store/…drv path, if this attr produces a derivation.
+    attribute_type TEXT NOT NULL DEFAULT 'regular',         -- 'regular' | 'effect' | etc.
+    error TEXT,                                             -- Evaluation error message, if any.
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+-- Speeds up "get all attributes for a job" queries.
+CREATE INDEX IF NOT EXISTS idx_attributes_job ON attributes(job_id);
+
+-- ── Derivation Info ──────────────────────────────────────────────────
+-- Stores Nix-level metadata from `nix show-derivation` so the
+-- scheduler knows which platform a build targets without
+-- re-evaluating.
+--
+-- `required_system_features` (JSON array) and `platform` are used to
+-- match builds to agents.  `input_derivations` (JSON array) lists
+-- transitive build inputs.  `outputs` (JSON object) maps output names
+-- to store paths.
+CREATE TABLE IF NOT EXISTS derivation_info (
+    id TEXT PRIMARY KEY NOT NULL,
+    job_id TEXT NOT NULL REFERENCES jobs(id),
+    derivation_path TEXT NOT NULL,                          -- /nix/store/…drv path.
+    platform TEXT NOT NULL,                                 -- Nix system string, e.g. "x86_64-linux".
+    required_system_features TEXT NOT NULL DEFAULT '[]',    -- JSON array, e.g. '["kvm"]'.
+    input_derivations TEXT NOT NULL DEFAULT '[]',           -- JSON array of input .drv paths.
+    outputs TEXT NOT NULL DEFAULT '{}',                     -- JSON object: {"out": "/nix/store/…", …}.
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+-- Speeds up "get all derivation info for a job" queries.
+CREATE INDEX IF NOT EXISTS idx_derivation_info_job ON derivation_info(job_id);
+
+-- ── Builds ───────────────────────────────────────────────────────────
+-- Builds are **deduplicated by derivation path**.  If two different
+-- jobs require the same /nix/store/…drv, only one build record is
+-- created.  The many-to-many `build_jobs` table below tracks which
+-- jobs share a build.
+--
+-- `INSERT OR IGNORE` on the UNIQUE derivation_path column implements
+-- the deduplication (see `create_or_get_build`).
+--
+-- Lifecycle:  pending -> building -> succeeded / failed / cancelled
+CREATE TABLE IF NOT EXISTS builds (
+    id TEXT PRIMARY KEY NOT NULL,
+    derivation_path TEXT NOT NULL UNIQUE,                   -- Deduplication key.
+    status TEXT NOT NULL DEFAULT 'pending',                 -- Build lifecycle state.
+    agent_session_id TEXT REFERENCES agent_sessions(id),    -- Agent that is building (NULL while pending).
+    started_at TEXT,                                        -- Set when status becomes 'building'.
+    completed_at TEXT,                                      -- Set when status reaches a terminal state.
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+
+-- ── Build-Jobs join table ────────────────────────────────────────────
+-- Many-to-many relationship between builds and jobs.  Because builds
+-- are deduplicated, a single build can be shared across multiple jobs
+-- (and even projects).  This table lets the job controller query
+-- "are all builds for job X done?".
+CREATE TABLE IF NOT EXISTS build_jobs (
+    build_id TEXT NOT NULL REFERENCES builds(id),
+    job_id TEXT NOT NULL REFERENCES jobs(id),
+    PRIMARY KEY (build_id, job_id)                          -- Composite PK prevents duplicate links.
+);
+
+-- ── Effects ──────────────────────────────────────────────────────────
+-- Effects are post-build side-effects (deploys, notifications, state
+-- file updates) defined in the `herculesCI.onPush` output.  They run
+-- after all builds for a job complete.
+--
+-- Effects are serialised per (project, ref): effects for sequence
+-- number N do not start until all effects for sequence < N on the
+-- same ref have completed.  This prevents overlapping deploys.
+--
+-- Lifecycle:  pending -> running -> succeeded / failed / cancelled
+CREATE TABLE IF NOT EXISTS effects (
+    id TEXT PRIMARY KEY NOT NULL,
+    job_id TEXT NOT NULL REFERENCES jobs(id),
+    attribute_path TEXT NOT NULL,                           -- JSON array of the Nix attribute path.
+    derivation_path TEXT NOT NULL,                          -- /nix/store/…drv path of the effect derivation.
+    status TEXT NOT NULL DEFAULT 'pending',
+    started_at TEXT,                                        -- Set when status becomes 'running'.
+    completed_at TEXT,                                      -- Set on terminal status.
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+-- Speeds up "get all effects for a job" queries.
+CREATE INDEX IF NOT EXISTS idx_effects_job ON effects(job_id);
+
+-- ── State Files ──────────────────────────────────────────────────────
+-- Implements the Hercules CI `hci state` feature: a key-value store
+-- of versioned binary blobs scoped per project.  Effects can read and
+-- write state files to persist data across CI runs (e.g., Terraform
+-- state, deployment manifests).
+--
+-- Each write bumps the `version` counter and replaces the `data` BLOB.
+-- The composite primary key (project_id, name) enforces uniqueness.
+CREATE TABLE IF NOT EXISTS state_files (
+    project_id TEXT NOT NULL REFERENCES projects(id),
+    name TEXT NOT NULL,                                     -- User-defined state file name.
+    data BLOB NOT NULL,                                    -- Raw binary payload.
+    version INTEGER NOT NULL DEFAULT 1,                    -- Monotonically increasing on each write.
+    size_bytes INTEGER NOT NULL DEFAULT 0,                 -- Cached size for listing without loading data.
+    updated_at TEXT NOT NULL DEFAULT (datetime('now')),
+    PRIMARY KEY (project_id, name)
+);
+
+-- ── State Locks ──────────────────────────────────────────────────────
+-- Distributed advisory locks with automatic lease expiry.  Effects
+-- acquire a lock before reading/writing a state file to prevent
+-- concurrent modifications from parallel jobs.
+--
+-- The UNIQUE(project_id, name) constraint enforces mutual exclusion:
+-- only one lock per (project, name) can exist at a time.  Expired
+-- locks are cleaned up lazily on acquire and periodically by a
+-- background janitor.
+CREATE TABLE IF NOT EXISTS state_locks (
+    id TEXT PRIMARY KEY NOT NULL,
+    project_id TEXT NOT NULL REFERENCES projects(id),
+    name TEXT NOT NULL,                                     -- Lock name (typically matches the state file name).
+    owner TEXT NOT NULL,                                    -- Free-form identifier of the lock holder.
+    expires_at TEXT NOT NULL,                               -- Lease expiry; after this time the lock is stale.
+    created_at TEXT NOT NULL DEFAULT (datetime('now')),
+    UNIQUE(project_id, name)                               -- At most one active lock per (project, name).
+);
+
+-- ── Secrets ──────────────────────────────────────────────────────────
+-- Encrypted JSON blobs scoped to a project.  Secrets are delivered to
+-- the agent during effect execution when the `condition` matches
+-- (e.g., "always", or only for pushes to the default branch).
+--
+-- The `data` column stores the secret payload as JSON text.  At the
+-- Rust layer it is wrapped in `Sensitive<_>` to prevent accidental
+-- logging.
+CREATE TABLE IF NOT EXISTS secrets (
+    id TEXT PRIMARY KEY NOT NULL,
+    project_id TEXT NOT NULL REFERENCES projects(id),
+    name TEXT NOT NULL,                                     -- User-defined secret name.
+    data TEXT NOT NULL,                                     -- JSON blob with the secret payload.
+    condition TEXT NOT NULL DEFAULT '"always"',             -- JSON-serialized SecretCondition enum.
+    created_at TEXT NOT NULL DEFAULT (datetime('now')),
+    UNIQUE(project_id, name)                               -- One secret per name per project.
+);
+
+-- ── Log Entries ──────────────────────────────────────────────────────
+-- Agents stream structured log lines while executing tasks (evaluation,
+-- build, or effect).  Each line has a zero-based index, a millisecond
+-- timestamp, a message string, and a severity level.
+--
+-- Uses INTEGER PRIMARY KEY AUTOINCREMENT as a surrogate key (not UUID)
+-- for insert performance on high-volume log streams.
+CREATE TABLE IF NOT EXISTS log_entries (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    task_id TEXT NOT NULL,                                  -- The task producing these logs.
+    line_index INTEGER NOT NULL,                            -- Zero-based line number within the task.
+    timestamp_ms INTEGER NOT NULL,                          -- Milliseconds since epoch for the log line.
+    message TEXT NOT NULL,                                  -- Log message content.
+    level TEXT NOT NULL DEFAULT 'info',                     -- 'debug' | 'info' | 'warn' | 'error'
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
+-- Speeds up paginated log retrieval: "get lines N..N+limit for task X".
+CREATE INDEX IF NOT EXISTS idx_log_entries_task ON log_entries(task_id, line_index);
+
+-- ── Schedules ────────────────────────────────────────────────────────
+-- Cron-based job triggers.  When enabled, the scheduler creates a new
+-- job at the configured interval on the specified ref.
+-- (Future feature -- not yet wired into the scheduler.)
+CREATE TABLE IF NOT EXISTS schedules (
+    id TEXT PRIMARY KEY NOT NULL,
+    project_id TEXT NOT NULL REFERENCES projects(id),
+    cron_expression TEXT NOT NULL,                          -- Standard 5-field cron expression.
+    ref_name TEXT NOT NULL DEFAULT 'main',                  -- Git ref to evaluate.
+    enabled INTEGER NOT NULL DEFAULT 1,                     -- 1 = active, 0 = paused.
+    last_triggered_at TEXT,                                 -- When the cron last fired.
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+);
--- a/crates/jupiter-db/migrations/20240102000000_account_password.sql
+++ b/crates/jupiter-db/migrations/20240102000000_account_password.sql
@ -0,0 +1,14 @@
+-- =======================================================================
+-- Add password-based authentication for accounts
+-- =======================================================================
+--
+-- The initial schema only supported agent authentication via cluster
+-- join tokens (bcrypt-hashed bearer tokens).  This migration adds a
+-- `password_hash` column to the `accounts` table so that human users
+-- can also authenticate with a username + password (bcrypt-hashed).
+--
+-- The column is nullable: accounts that authenticate exclusively via
+-- forge OAuth (GitHub, Gitea, etc.) will leave it NULL.  The auth
+-- layer checks for NULL before attempting bcrypt verification.
+
+ALTER TABLE accounts ADD COLUMN password_hash TEXT;
--- a/crates/jupiter-db/src/backend.rs
+++ b/crates/jupiter-db/src/backend.rs
@ -0,0 +1,693 @@
+//! # StorageBackend -- the database abstraction trait
+//!
+//! Every server component that needs to persist or query data depends on
+//! this trait rather than on a concrete database implementation.  This
+//! inversion allows:
+//!
+//! 1. Swapping SQLite for PostgreSQL via a feature flag.
+//! 2. Using an in-memory SQLite database in integration tests.
+//! 3. Eventually mocking the trait in unit tests.
+//!
+//! The trait surface is organised into sections that match the Hercules CI
+//! data model.  Each section corresponds to one or more SQL tables (see
+//! the migration files for the full schema).
+//!
+//! ## Hercules CI pipeline overview
+//!
+//! ```text
+//! webhook / push event
+//!   --> create Job (status: pending)
+//!   --> enqueue evaluation Task
+//!   --> agent dequeues & evaluates the flake
+//!   --> store Attributes + DerivationInfo
+//!   --> create or deduplicate Builds
+//!   --> enqueue build Tasks (one per unique derivation)
+//!   --> agents build; when all builds complete:
+//!   --> create & run Effects (side-effects like deploys)
+//!   --> job marked succeeded / failed
+//! ```
+
+use async_trait::async_trait;
+use jupiter_api_types::{
+    Account, AccountType, AgentHello, AgentSession, AttributeResult, AttributeType, Build,
+    BuildStatus, ClusterJoinToken, Effect, EffectStatus, ForgeType, Job, JobStatus, JobSummary,
+    LogEntry, Project, Repo, Secret, SecretCondition, StateFile, StateLock, TaskStatus, TaskType,
+};
+use uuid::Uuid;
+
+use crate::error::Result;
+
+/// Async trait that abstracts all database operations for the Jupiter
+/// server.
+///
+/// Implementations must be `Send + Sync + 'static` so they can be shared
+/// across Tokio tasks behind an `Arc`.
+///
+/// All IDs are passed as raw [`Uuid`] values.  The API layer is
+/// responsible for wrapping/unwrapping the phantom-typed [`Id<T>`] from
+/// `jupiter-api-types`.
+#[async_trait]
+pub trait StorageBackend: Send + Sync + 'static {
+    // ── Initialization ───────────────────────────────────────────────
+
+    /// Run all pending sqlx migrations against the connected database.
+    ///
+    /// Called once at server startup.  Migration files live in
+    /// `crates/jupiter-db/migrations/` and are embedded at compile time
+    /// by the `sqlx::migrate!` macro.
+    async fn run_migrations(&self) -> Result<()>;
+
+    // ── Accounts ─────────────────────────────────────────────────────
+    //
+    // An Account is the top-level ownership entity.  In Hercules CI an
+    // account can be a user or an organisation.  Projects, join tokens,
+    // and agent sessions all belong to an account.
+
+    /// Create a new account with the given display name and type.
+    ///
+    /// Returns `DbError::Sqlx` if the name violates the UNIQUE constraint.
+    async fn create_account(&self, name: &str, typ: AccountType) -> Result<Account>;
+
+    /// Fetch a single account by its primary-key UUID.
+    async fn get_account(&self, id: Uuid) -> Result<Account>;
+
+    /// Fetch a single account by its unique display name.
+    ///
+    /// Used during login and API-key resolution where the caller only
+    /// knows the account name.
+    async fn get_account_by_name(&self, name: &str) -> Result<Account>;
+
+    /// Return the bcrypt-hashed password for the account, if one has been
+    /// set.  Returns `Ok(None)` for accounts that authenticate exclusively
+    /// via forge OAuth or that have not yet set a password.
+    async fn get_account_password_hash(&self, name: &str) -> Result<Option<String>>;
+
+    /// Set (or replace) the bcrypt-hashed password for an account.
+    ///
+    /// The hash is stored as an opaque string; the caller is responsible
+    /// for hashing with an appropriate cost factor before calling this
+    /// method.
+    async fn set_account_password_hash(&self, id: Uuid, password_hash: &str) -> Result<()>;
+
+    /// List every account, ordered by creation time.
+    async fn list_accounts(&self) -> Result<Vec<Account>>;
+
+    // ── Cluster Join Tokens ──────────────────────────────────────────
+    //
+    // When an `hercules-ci-agent` first connects it presents a bearer
+    // token.  The server looks up the matching bcrypt hash in this table
+    // to authenticate the agent and associate it with an account.
+    //
+    // Tokens are one-way hashed (bcrypt) so a database leak does not
+    // expose credentials.
+
+    /// Persist a new join token.
+    ///
+    /// `token_hash` is the bcrypt hash of the raw bearer token that was
+    /// shown to the admin at creation time.  The raw token is never
+    /// stored.
+    async fn create_cluster_join_token(
+        &self,
+        account_id: Uuid,
+        name: &str,
+        token_hash: &str,
+    ) -> Result<ClusterJoinToken>;
+
+    /// List all join tokens belonging to an account (hash excluded).
+    async fn list_cluster_join_tokens(&self, account_id: Uuid) -> Result<Vec<ClusterJoinToken>>;
+
+    /// Retrieve the bcrypt hash for a specific token by its UUID.
+    ///
+    /// Used during agent authentication when the token ID is already
+    /// known.
+    async fn get_cluster_join_token_hash(&self, token_id: Uuid) -> Result<String>;
+
+    /// Delete (revoke) a cluster join token.
+    ///
+    /// Active agent sessions authenticated with this token are **not**
+    /// automatically terminated -- they remain valid until their next
+    /// re-authentication attempt.
+    async fn delete_cluster_join_token(&self, token_id: Uuid) -> Result<()>;
+
+    /// Return all `(token_id, bcrypt_hash)` pairs for an account so the
+    /// authentication layer can try each hash against the presented bearer
+    /// token.
+    ///
+    /// This linear scan is acceptable because each account typically has
+    /// only a handful of join tokens.
+    async fn find_cluster_join_token_by_hash(
+        &self,
+        account_id: Uuid,
+    ) -> Result<Vec<(Uuid, String)>>;
+
+    // ── Agent Sessions ───────────────────────────────────────────────
+    //
+    // Each connected `hercules-ci-agent` has exactly one session row.
+    // The session records the agent's platform capabilities (e.g.,
+    // `x86_64-linux`, `aarch64-darwin`), system features, and
+    // concurrency limit.  The scheduler uses this information to match
+    // tasks to capable agents.
+
+    /// Register a newly-connected agent.
+    ///
+    /// The [`AgentHello`] payload contains the agent's self-reported
+    /// capabilities (platforms, system features, concurrency, versions).
+    async fn create_agent_session(
+        &self,
+        agent_hello: &AgentHello,
+        account_id: Uuid,
+    ) -> Result<AgentSession>;
+
+    /// Fetch a single agent session by UUID.
+    async fn get_agent_session(&self, id: Uuid) -> Result<AgentSession>;
+
+    /// List all currently-registered agent sessions.
+    async fn list_agent_sessions(&self) -> Result<Vec<AgentSession>>;
+
+    /// Bump the `last_heartbeat` timestamp for a connected agent.
+    ///
+    /// The server uses heartbeat age to detect stale sessions (agents
+    /// that disconnected without a clean goodbye).
+    async fn update_agent_heartbeat(&self, id: Uuid) -> Result<()>;
+
+    /// Remove an agent session (agent disconnected or timed out).
+    ///
+    /// Any tasks still assigned to this agent should be requeued
+    /// separately via [`requeue_agent_tasks`](Self::requeue_agent_tasks).
+    async fn delete_agent_session(&self, id: Uuid) -> Result<()>;
+
+    /// Find all agent sessions whose `platforms` JSON array contains the
+    /// given platform string (e.g. `"x86_64-linux"`).
+    ///
+    /// Used by the scheduler to determine which agents can run a task
+    /// that requires a specific platform.
+    async fn get_active_agent_sessions_for_platform(
+        &self,
+        platform: &str,
+    ) -> Result<Vec<AgentSession>>;
+
+    // ── Repos ────────────────────────────────────────────────────────
+    //
+    // A Repo is a mirror of a repository on an external forge (GitHub,
+    // Gitea, etc.).  It stores the clone URL and default branch so the
+    // agent knows where to fetch code.  Repos are unique per
+    // (forge, owner, name) triple.
+
+    /// Register a repository from a forge.
+    async fn create_repo(
+        &self,
+        forge_id: Uuid,
+        owner: &str,
+        name: &str,
+        clone_url: &str,
+        default_branch: &str,
+    ) -> Result<Repo>;
+
+    /// Fetch a repository by its primary-key UUID.
+    async fn get_repo(&self, id: Uuid) -> Result<Repo>;
+
+    /// Look up a repository by its forge-side identity (forge + owner + name).
+    ///
+    /// Returns `None` if no matching repo has been registered yet.
+    /// Used during webhook processing to find or create the repo.
+    async fn find_repo(
+        &self,
+        forge_id: Uuid,
+        owner: &str,
+        name: &str,
+    ) -> Result<Option<Repo>>;
+
+    /// List all repositories associated with a given forge.
+    async fn list_repos(&self, forge_id: Uuid) -> Result<Vec<Repo>>;
+
+    // ── Projects ─────────────────────────────────────────────────────
+    //
+    // A Project binds an Account to a Repo and serves as the grouping
+    // entity for jobs, state files, secrets, and schedules.  This is
+    // the primary unit the user interacts with in the Hercules CI
+    // dashboard.
+
+    /// Create a new project owned by `account_id` and backed by `repo_id`.
+    ///
+    /// Projects are enabled by default.  Disabled projects ignore
+    /// incoming webhooks.
+    async fn create_project(
+        &self,
+        account_id: Uuid,
+        repo_id: Uuid,
+        name: &str,
+    ) -> Result<Project>;
+
+    /// Fetch a project by primary key.
+    async fn get_project(&self, id: Uuid) -> Result<Project>;
+
+    /// Fetch a project by its unique display name.
+    async fn get_project_by_name(&self, name: &str) -> Result<Project>;
+
+    /// Toggle the `enabled` flag on a project and return the updated row.
+    ///
+    /// Disabled projects will not create new jobs when webhooks arrive.
+    async fn update_project(&self, id: Uuid, enabled: bool) -> Result<Project>;
+
+    /// List all projects, ordered by creation time.
+    async fn list_projects(&self) -> Result<Vec<Project>>;
+
+    /// Find the project (if any) that is linked to the given repo.
+    ///
+    /// At most one project can point to each repo.  Used during webhook
+    /// processing to route an event to the correct project.
+    async fn find_project_by_repo(&self, repo_id: Uuid) -> Result<Option<Project>>;
+
+    // ── Jobs ─────────────────────────────────────────────────────────
+    //
+    // A Job represents a single CI run triggered by a push or pull
+    // request event.  It progresses through:
+    //
+    //   pending -> evaluating -> building -> running_effects -> succeeded / failed
+    //
+    // Each job belongs to exactly one project and is identified by a
+    // per-(project, ref) monotonically-increasing sequence number.
+
+    /// Create a new job in `pending` status.
+    ///
+    /// Automatically assigns the next sequence number for the given
+    /// (project, ref) pair.  The `forge_type`, `repo_owner`, and
+    /// `repo_name` are denormalized from the project's repo for
+    /// convenient display and webhook status reporting.
+    async fn create_job(
+        &self,
+        project_id: Uuid,
+        forge_type: ForgeType,
+        repo_owner: &str,
+        repo_name: &str,
+        ref_name: &str,
+        commit_sha: &str,
+    ) -> Result<Job>;
+
+    /// Fetch a job by primary key.
+    async fn get_job(&self, id: Uuid) -> Result<Job>;
+
+    /// Transition a job to the given status and bump `updated_at`.
+    async fn update_job_status(&self, id: Uuid, status: JobStatus) -> Result<()>;
+
+    /// Paginated listing of jobs for a project, newest first.
+    ///
+    /// Returns `(summaries, total_count)` so the API can set pagination
+    /// headers.
+    async fn list_jobs_for_project(
+        &self,
+        project_id: Uuid,
+        page: u64,
+        per_page: u64,
+    ) -> Result<(Vec<JobSummary>, u64)>;
+
+    /// Return the most recent job for a (project, ref) pair, by sequence
+    /// number.
+    ///
+    /// Used to determine whether a new push supersedes an in-progress
+    /// job on the same branch.
+    async fn get_latest_job_for_ref(
+        &self,
+        project_id: Uuid,
+        ref_name: &str,
+    ) -> Result<Option<Job>>;
+
+    /// Compute the next sequence number for a (project, ref) pair.
+    ///
+    /// Sequence numbers start at 1 and monotonically increase.  They are
+    /// used to order effects: an effect for sequence N will not run until
+    /// all effects for sequences < N on the same ref have completed.
+    async fn get_next_sequence_number(
+        &self,
+        project_id: Uuid,
+        ref_name: &str,
+    ) -> Result<i64>;
+
+    // ── Task Queue ───────────────────────────────────────────────────
+    //
+    // The task queue is a unified dispatch mechanism.  Evaluation, build,
+    // and effect tasks all live in the same `task_queue` table.  Each
+    // task optionally specifies a required `platform` (e.g.,
+    // `x86_64-linux`) so the scheduler can route it to a capable agent.
+    //
+    // Tasks flow through:  pending -> running -> succeeded / failed
+    //
+    // If an agent disconnects, its running tasks are requeued to pending
+    // so another agent can pick them up.
+
+    /// Insert a new task into the queue in `pending` status.
+    ///
+    /// `platform` may be `None` for tasks that can run on any agent
+    /// (e.g., evaluation of platform-independent expressions).
+    /// `payload` is an opaque JSON blob whose schema depends on
+    /// `task_type`.
+    async fn enqueue_task(
+        &self,
+        job_id: Uuid,
+        task_type: TaskType,
+        platform: Option<&str>,
+        payload: &serde_json::Value,
+    ) -> Result<Uuid>;
+
+    /// Atomically dequeue the oldest pending task that matches the given
+    /// platform.
+    ///
+    /// The task is moved to `running` status inside a transaction so that
+    /// concurrent agents cannot claim the same task.  Returns `None` if
+    /// no matching task is available.
+    ///
+    /// `system_features` is accepted for future feature-matching but is
+    /// not yet used in the query.
+    async fn dequeue_task(
+        &self,
+        platform: &str,
+        system_features: &[String],
+    ) -> Result<Option<(Uuid, TaskType, serde_json::Value)>>;
+
+    /// Update the status of a task and optionally record which agent
+    /// session is handling it.
+    async fn update_task_status(
+        &self,
+        task_id: Uuid,
+        status: TaskStatus,
+        agent_session_id: Option<Uuid>,
+    ) -> Result<()>;
+
+    /// Retrieve a task's full metadata (id, type, status, payload).
+    async fn get_task(
+        &self,
+        task_id: Uuid,
+    ) -> Result<(Uuid, TaskType, TaskStatus, serde_json::Value)>;
+
+    /// Reset all `running` tasks owned by the given agent session back
+    /// to `pending`.
+    ///
+    /// Called when an agent disconnects unexpectedly so that its
+    /// in-flight work is retried by another agent.  Returns the list
+    /// of task IDs that were requeued.
+    async fn requeue_agent_tasks(&self, agent_session_id: Uuid) -> Result<Vec<Uuid>>;
+
+    /// Look up which job a task belongs to.
+    ///
+    /// Used by the agent protocol handler to route task results back to
+    /// the originating job.
+    async fn get_task_job_id(&self, task_id: Uuid) -> Result<Uuid>;
+
+    // ── Evaluations / Attributes ─────────────────────────────────────
+    //
+    // During evaluation the agent walks the flake's `herculesCI` output
+    // attribute tree.  Each discovered attribute is recorded here,
+    // along with its derivation path (if it produces one) and type.
+    //
+    // DerivationInfo stores Nix-level metadata from `nix show-derivation`
+    // so the scheduler knows which platform a build needs without
+    // re-evaluating.
+
+    /// Record a single attribute discovered during evaluation.
+    ///
+    /// `path` is the Nix attribute path as a list of segments (e.g.,
+    /// `["herculesCI", "ciSystems", "x86_64-linux", "default"]`).
+    /// `derivation_path` is the `/nix/store/...drv` path, if this
+    /// attribute produces a derivation.
+    async fn store_attribute(
+        &self,
+        job_id: Uuid,
+        path: &[String],
+        derivation_path: Option<&str>,
+        typ: AttributeType,
+        error: Option<&str>,
+    ) -> Result<()>;
+
+    /// Store Nix derivation metadata obtained from `nix show-derivation`.
+    ///
+    /// `platform` (e.g. `"x86_64-linux"`) and `required_system_features`
+    /// (e.g. `["kvm"]`) are used by the scheduler to match builds to
+    /// agents.  `input_derivations` lists transitive build dependencies.
+    /// `outputs` is the JSON map of output names to store paths.
+    async fn store_derivation_info(
+        &self,
+        job_id: Uuid,
+        derivation_path: &str,
+        platform: &str,
+        required_system_features: &[String],
+        input_derivations: &[String],
+        outputs: &serde_json::Value,
+    ) -> Result<()>;
+
+    /// Retrieve all attributes recorded for a job's evaluation.
+    async fn get_evaluation_attributes(&self, job_id: Uuid) -> Result<Vec<AttributeResult>>;
+
+    /// Return every unique derivation path discovered during a job's
+    /// evaluation.
+    ///
+    /// Used after evaluation completes to create the corresponding
+    /// build records.
+    async fn get_derivation_paths_for_job(&self, job_id: Uuid) -> Result<Vec<String>>;
+
+    /// Look up the target platform for a given derivation path.
+    ///
+    /// Returns `None` if the derivation has not been recorded (e.g., it
+    /// was a dependency that was not evaluated in this job).  The
+    /// scheduler calls this to decide which agent platform can build
+    /// the derivation.
+    async fn get_derivation_platform(
+        &self,
+        derivation_path: &str,
+    ) -> Result<Option<String>>;
+
+    // ── Builds ───────────────────────────────────────────────────────
+    //
+    // Builds are **deduplicated by derivation path**.  If two different
+    // jobs (or even two different projects) need the same
+    // `/nix/store/...drv`, only one build record is created.  The
+    // `build_jobs` join table tracks which jobs share a build so their
+    // statuses can all be updated when the build completes.
+    //
+    // Build lifecycle:  pending -> building -> succeeded / failed / cancelled
+
+    /// Insert a new build for `derivation_path`, or return the existing
+    /// build if one already exists (deduplication).
+    ///
+    /// Returns `(build_id, was_created)`.  `was_created` is `false` when
+    /// the derivation was already known, meaning no new work needs to be
+    /// scheduled.
+    async fn create_or_get_build(&self, derivation_path: &str) -> Result<(Uuid, bool)>;
+
+    /// Fetch a build by primary key.
+    async fn get_build(&self, id: Uuid) -> Result<Build>;
+
+    /// Look up a build by its derivation path.
+    async fn get_build_by_drv_path(&self, derivation_path: &str) -> Result<Option<Build>>;
+
+    /// Transition a build's status and optionally record the building
+    /// agent.
+    ///
+    /// Automatically sets `started_at` when entering `Building` and
+    /// `completed_at` when entering a terminal status.
+    async fn update_build_status(
+        &self,
+        id: Uuid,
+        status: BuildStatus,
+        agent_session_id: Option<Uuid>,
+    ) -> Result<()>;
+
+    /// Associate a build with a job (many-to-many).
+    ///
+    /// Silently succeeds if the link already exists (`INSERT OR IGNORE`).
+    async fn link_build_to_job(&self, build_id: Uuid, job_id: Uuid) -> Result<()>;
+
+    /// Check whether every build linked to a job has reached a terminal
+    /// status (`succeeded`, `failed`, or `cancelled`).
+    ///
+    /// The job controller calls this after each build status update to
+    /// decide whether to advance the job to the effects phase.
+    async fn are_all_builds_complete(&self, job_id: Uuid) -> Result<bool>;
+
+    // ── Effects ──────────────────────────────────────────────────────
+    //
+    // Effects are post-build side-effects (deploys, notifications,
+    // state-file updates, etc.) defined in the `herculesCI.onPush`
+    // output.  They are serialised: for a given (project, ref), effects
+    // for sequence number N do not start until all effects for
+    // sequence < N have completed.  This prevents overlapping deploys.
+    //
+    // Effect lifecycle:  pending -> running -> succeeded / failed / cancelled
+
+    /// Create a new effect record for a job.
+    async fn create_effect(
+        &self,
+        job_id: Uuid,
+        attribute_path: &[String],
+        derivation_path: &str,
+    ) -> Result<Uuid>;
+
+    /// Fetch an effect by primary key.
+    async fn get_effect(&self, id: Uuid) -> Result<Effect>;
+
+    /// Look up an effect by its (job, attribute_path) pair.
+    ///
+    /// `attribute_path` is the JSON-serialized path string.
+    async fn get_effect_by_job_and_attr(&self, job_id: Uuid, attribute_path: &str) -> Result<Effect>;
+
+    /// List all effects associated with a job, ordered by creation time.
+    async fn get_effects_for_job(&self, job_id: Uuid) -> Result<Vec<Effect>>;
+
+    /// Transition an effect's status.
+    ///
+    /// Automatically sets `started_at` when entering `Running` and
+    /// `completed_at` when entering a terminal status.
+    async fn update_effect_status(&self, id: Uuid, status: EffectStatus) -> Result<()>;
+
+    /// Check whether every effect for a job has reached a terminal status.
+    async fn are_all_effects_complete(&self, job_id: Uuid) -> Result<bool>;
+
+    /// Check whether all effects from earlier sequence numbers on the
+    /// same (project, ref) have completed.
+    ///
+    /// Used to enforce the serialisation invariant: effects for a newer
+    /// push must wait until previous pushes' effects have finished.
+    /// This prevents concurrent deploys from the same branch.
+    async fn are_preceding_effects_done(
+        &self,
+        project_id: Uuid,
+        ref_name: &str,
+        sequence_number: i64,
+    ) -> Result<bool>;
+
+    // ── State Files ──────────────────────────────────────────────────
+    //
+    // State files implement the Hercules CI `hci state` feature: a
+    // key-value store of versioned binary blobs scoped per project.
+    // Effects can read/write these files to persist data across CI runs
+    // (e.g., Terraform state, deployment manifests).
+    //
+    // Each write bumps the version counter and replaces the data.
+    // The version number enables optimistic-concurrency checks in
+    // higher-level code.
+
+    /// Insert or update a state file.
+    ///
+    /// Uses `INSERT ... ON CONFLICT DO UPDATE` so that the first write
+    /// creates the row at version 1, and subsequent writes atomically
+    /// increment the version.
+    async fn put_state_file(
+        &self,
+        project_id: Uuid,
+        name: &str,
+        data: &[u8],
+    ) -> Result<()>;
+
+    /// Retrieve the raw bytes of a state file.
+    ///
+    /// Returns `None` if the file has never been written.
+    async fn get_state_file(
+        &self,
+        project_id: Uuid,
+        name: &str,
+    ) -> Result<Option<Vec<u8>>>;
+
+    /// List all state files for a project (metadata only, no data blobs).
+    async fn list_state_files(&self, project_id: Uuid) -> Result<Vec<StateFile>>;
+
+    // ── State Locks ──────────────────────────────────────────────────
+    //
+    // Distributed advisory locks with automatic lease expiry.  Effects
+    // acquire a lock before reading/writing a state file to prevent
+    // concurrent modifications from parallel jobs.
+    //
+    // The UNIQUE(project_id, name) constraint on the `state_locks`
+    // table ensures mutual exclusion at the database level.  Expired
+    // locks are cleaned up lazily (on acquire) and periodically via
+    // `cleanup_expired_locks`.
+
+    /// Attempt to acquire a named lock for a project.
+    ///
+    /// First deletes any expired lock for the same (project, name) pair,
+    /// then tries `INSERT OR IGNORE`.  Returns `DbError::Conflict` if
+    /// the lock is held by another owner and has not expired.
+    ///
+    /// `owner` is a free-form string identifying the holder (typically
+    /// the agent session ID or effect ID).  `ttl_seconds` controls the
+    /// lease duration.
+    async fn acquire_lock(
+        &self,
+        project_id: Uuid,
+        name: &str,
+        owner: &str,
+        ttl_seconds: u64,
+    ) -> Result<StateLock>;
+
+    /// Extend the lease of an existing lock.
+    ///
+    /// Useful for long-running effects that need to hold a lock beyond
+    /// the initial TTL without releasing and re-acquiring.
+    async fn renew_lock(&self, lock_id: Uuid, ttl_seconds: u64) -> Result<StateLock>;
+
+    /// Explicitly release a lock before it expires.
+    async fn release_lock(&self, lock_id: Uuid) -> Result<()>;
+
+    /// Delete all locks whose `expires_at` is in the past.
+    ///
+    /// Returns the number of expired locks removed.  Called periodically
+    /// by a background janitor task.
+    async fn cleanup_expired_locks(&self) -> Result<u64>;
+
+    // ── Secrets ──────────────────────────────────────────────────────
+    //
+    // Secrets are JSON blobs scoped to a project.  They are delivered
+    // to the agent during effect execution when the `condition` matches
+    // (e.g., only on the default branch).
+    //
+    // The `data` column stores the secret payload as JSON text.
+    // At the Rust level it is wrapped in `Sensitive<_>` to prevent
+    // accidental logging.
+
+    /// Create a new project secret.
+    ///
+    /// `data` is an opaque JSON value (typically `{"key": "value"}`
+    /// pairs).  `condition` controls when the secret is available --
+    /// e.g., only for pushes to the default branch.
+    async fn create_secret(
+        &self,
+        project_id: Uuid,
+        name: &str,
+        data: &serde_json::Value,
+        condition: &SecretCondition,
+    ) -> Result<Uuid>;
+
+    /// List all secrets for a project (including their data).
+    ///
+    /// The caller is responsible for filtering based on `condition`
+    /// before sending secrets to an agent.
+    async fn get_secrets_for_project(&self, project_id: Uuid) -> Result<Vec<Secret>>;
+
+    /// Delete a secret by its UUID.
+    async fn delete_secret(&self, id: Uuid) -> Result<()>;
+
+    // ── Log Entries ──────────────────────────────────────────────────
+    //
+    // Agents stream structured log lines while executing tasks.  Each
+    // line has a zero-based index, a millisecond timestamp, a message,
+    // and a severity level.  The dashboard uses these to display
+    // real-time build/effect logs.
+
+    /// Batch-insert log lines for a task.
+    ///
+    /// Runs inside a transaction for atomicity.  Idempotent if lines
+    /// with the same `(task_id, line_index)` are inserted again
+    /// (assuming the table allows it; currently no unique constraint
+    /// on the pair, so duplicates are possible if the agent retries).
+    async fn store_log_entries(
+        &self,
+        task_id: Uuid,
+        entries: &[LogEntry],
+    ) -> Result<()>;
+
+    /// Retrieve a page of log entries for a task, ordered by line index.
+    async fn get_log_entries(
+        &self,
+        task_id: Uuid,
+        offset: u64,
+        limit: u64,
+    ) -> Result<Vec<LogEntry>>;
+}
--- a/crates/jupiter-db/src/error.rs
+++ b/crates/jupiter-db/src/error.rs
@ -0,0 +1,58 @@
+//! # Database error types for jupiter-db
+//!
+//! Provides a unified [`DbError`] enum that every [`crate::backend::StorageBackend`]
+//! method returns.  The variants cover the four failure modes that callers
+//! need to distinguish:
+//!
+//! - **Sqlx** -- low-level driver or connection-pool errors (timeouts,
+//!   constraint violations not otherwise mapped, etc.).
+//! - **NotFound** -- the requested entity does not exist.  The HTTP layer
+//!   typically maps this to `404 Not Found`.
+//! - **Conflict** -- a uniqueness or locking constraint was violated
+//!   (e.g., trying to acquire a state lock that is already held).  Maps
+//!   to `409 Conflict`.
+//! - **Migration** -- schema migration failed on startup.  Fatal.
+//! - **Serialization** -- a JSON column could not be serialized or
+//!   deserialized (e.g., the `platforms` JSON array in `agent_sessions`).
+
+use thiserror::Error;
+
+/// Crate-level error type returned by every [`crate::backend::StorageBackend`] method.
+///
+/// The variants carry enough context for the API layer to choose an
+/// appropriate HTTP status code without inspecting error messages.
+#[derive(Debug, Error)]
+pub enum DbError {
+    /// A low-level sqlx driver error (connection failure, unexpected SQL
+    /// error, protocol parse issue, etc.).
+    #[error("database error: {0}")]
+    Sqlx(#[from] sqlx::Error),
+
+    /// The requested entity was not found.
+    ///
+    /// `entity` is a human-readable table/concept name (e.g. `"account"`,
+    /// `"build"`).  `id` is whatever key was used for the lookup.
+    #[error("not found: {entity} with id {id}")]
+    NotFound { entity: String, id: String },
+
+    /// A uniqueness or mutual-exclusion constraint was violated.
+    ///
+    /// Currently used by [`crate::backend::StorageBackend::acquire_lock`]
+    /// when the lock is already held by another owner.
+    #[error("conflict: {0}")]
+    Conflict(String),
+
+    /// A sqlx migration failed.  This is treated as fatal at startup.
+    #[error("migration error: {0}")]
+    Migration(#[from] sqlx::migrate::MigrateError),
+
+    /// JSON serialization or deserialization failed for a column that
+    /// stores structured data (e.g., `platforms`, `system_features`,
+    /// `attribute_path`, `condition`).
+    #[error("serialization error: {0}")]
+    Serialization(#[from] serde_json::Error),
+}
+
+/// Convenience alias used throughout the crate so that every function
+/// signature can simply return `Result<T>`.
+pub type Result<T> = std::result::Result<T, DbError>;
--- a/crates/jupiter-db/src/lib.rs
+++ b/crates/jupiter-db/src/lib.rs
@ -0,0 +1,46 @@
+//! # jupiter-db -- Persistence layer for Jupiter
+//!
+//! Jupiter is a self-hosted, wire-compatible replacement for
+//! [hercules-ci.com](https://hercules-ci.com). This crate owns every
+//! database interaction: schema migrations, CRUD operations, and the
+//! task-queue that drives the eval-build-effects pipeline.
+//!
+//! ## Architecture
+//!
+//! All server components depend on the [`backend::StorageBackend`] async
+//! trait rather than on a concrete database driver.  Today the only
+//! implementation is [`sqlite::SqliteBackend`] (the default), but the
+//! trait is designed so that a PostgreSQL backend can be added behind a
+//! feature flag without touching any calling code.
+//!
+//! ## Modules
+//!
+//! | Module      | Purpose |
+//! |-------------|---------|
+//! | [`backend`] | Defines the `StorageBackend` trait -- the public contract. |
+//! | [`error`]   | Crate-level error and `Result` types. |
+//! | [`sqlite`]  | SQLite implementation of `StorageBackend` via sqlx. |
+//!
+//! ## Data model overview
+//!
+//! The schema mirrors the Hercules CI object model:
+//!
+//! ```text
+//! Account
+//!   +-- ClusterJoinToken   (agent authentication)
+//!   +-- Project
+//!         +-- Repo           (forge-side repository reference)
+//!         +-- Job            (one per push / PR event)
+//!         |     +-- Attribute      (evaluation output)
+//!         |     +-- DerivationInfo (platform & inputs metadata)
+//!         |     +-- Build          (deduplicated by drv path)
+//!         |     +-- Effect         (post-build side-effects)
+//!         |     +-- TaskQueue      (unified dispatch to agents)
+//!         +-- StateFile      (versioned binary blobs for `hci state`)
+//!         +-- StateLock      (distributed lock with lease expiry)
+//!         +-- Secret         (encrypted per-project secrets)
+//! ```
+
+pub mod backend;
+pub mod error;
+pub mod sqlite;
--- a/crates/jupiter-db/src/sqlite.rs
+++ b/crates/jupiter-db/src/sqlite.rs