init
This commit is contained in:
commit
fd80fbab7e
48 changed files with 16775 additions and 0 deletions
337
crates/jupiter-db/migrations/20240101000000_initial.sql
Normal file
337
crates/jupiter-db/migrations/20240101000000_initial.sql
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
-- =======================================================================
|
||||
-- Jupiter initial schema
|
||||
-- =======================================================================
|
||||
--
|
||||
-- This migration creates the complete data model for Jupiter, a
|
||||
-- self-hosted, wire-compatible replacement for hercules-ci.com.
|
||||
--
|
||||
-- The schema mirrors the Hercules CI object hierarchy:
|
||||
--
|
||||
-- Account -> Project -> Job -> [Attributes, Builds, Effects]
|
||||
--
|
||||
-- Key design choices:
|
||||
-- - All IDs are UUIDv4 stored as TEXT (SQLite has no native UUID type).
|
||||
-- - All timestamps are TEXT in UTC "YYYY-MM-DD HH:MM:SS" format.
|
||||
-- - Booleans are INTEGER 0/1 (SQLite convention).
|
||||
-- - Structured data (JSON arrays/objects) are stored as TEXT and
|
||||
-- serialized/deserialized at the application layer.
|
||||
-- - Foreign keys enforce referential integrity (requires PRAGMA
|
||||
-- foreign_keys=ON at connection time).
|
||||
-- =======================================================================
|
||||
|
||||
-- ── Accounts ─────────────────────────────────────────────────────────
|
||||
-- Top-level ownership entity. Every project, join token, and agent
|
||||
-- session belongs to exactly one account. In Hercules CI an account
|
||||
-- can be a "user" or an "organization".
|
||||
CREATE TABLE IF NOT EXISTS accounts (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
name TEXT NOT NULL UNIQUE, -- Human-readable display name; also used for login.
|
||||
account_type TEXT NOT NULL DEFAULT 'user', -- 'user' | 'organization'
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- ── Cluster Join Tokens ──────────────────────────────────────────────
|
||||
-- Bearer tokens that hercules-ci-agent presents during the WebSocket
|
||||
-- handshake. Only the bcrypt hash is stored; the raw token is shown
|
||||
-- to the admin once at creation time and never persisted.
|
||||
CREATE TABLE IF NOT EXISTS cluster_join_tokens (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
account_id TEXT NOT NULL REFERENCES accounts(id), -- Owning account; agent inherits this identity.
|
||||
name TEXT NOT NULL, -- Admin-friendly label (e.g., "prod-agent-1").
|
||||
token_hash TEXT NOT NULL, -- bcrypt hash of the raw bearer token.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- ── Forges ───────────────────────────────────────────────────────────
|
||||
-- A forge is an external code-hosting platform (GitHub, Gitea, etc.).
|
||||
-- Webhook secrets and API credentials are stored in `config` (JSON).
|
||||
CREATE TABLE IF NOT EXISTS forges (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
forge_type TEXT NOT NULL, -- 'github' | 'gitea' | etc.
|
||||
config TEXT NOT NULL, -- JSON blob with API URL, webhook secret, tokens, etc.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- ── Repos ────────────────────────────────────────────────────────────
|
||||
-- Mirror of a repository on a forge. Stores the clone URL and default
|
||||
-- branch so agents know where to fetch code.
|
||||
-- UNIQUE(forge_id, owner, name) prevents duplicate registrations of
|
||||
-- the same repo from different webhook deliveries.
|
||||
CREATE TABLE IF NOT EXISTS repos (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
forge_id TEXT NOT NULL REFERENCES forges(id), -- Which forge this repo lives on.
|
||||
owner TEXT NOT NULL, -- GitHub/Gitea user or org owning the repo.
|
||||
name TEXT NOT NULL, -- Repository name (without owner prefix).
|
||||
clone_url TEXT NOT NULL, -- HTTPS or SSH clone URL.
|
||||
default_branch TEXT NOT NULL DEFAULT 'main', -- Used to decide if a push triggers effects.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(forge_id, owner, name)
|
||||
);
|
||||
|
||||
-- ── Projects ─────────────────────────────────────────────────────────
|
||||
-- A project binds an account to a repo. It is the primary grouping
|
||||
-- entity for jobs, secrets, state files, and schedules.
|
||||
-- `enabled` controls whether incoming webhooks create jobs.
|
||||
CREATE TABLE IF NOT EXISTS projects (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
account_id TEXT NOT NULL REFERENCES accounts(id), -- Owning account.
|
||||
repo_id TEXT NOT NULL REFERENCES repos(id), -- Backing repository.
|
||||
name TEXT NOT NULL UNIQUE, -- Human-readable project name.
|
||||
enabled INTEGER NOT NULL DEFAULT 1, -- 1 = active, 0 = paused.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- ── Agent Sessions ───────────────────────────────────────────────────
|
||||
-- Each connected hercules-ci-agent has one row here. The session
|
||||
-- records the agent's self-reported capabilities so the scheduler can
|
||||
-- match tasks to capable agents.
|
||||
--
|
||||
-- `platforms` is a JSON array of Nix system strings, e.g.,
|
||||
-- ["x86_64-linux", "aarch64-linux"].
|
||||
-- `system_features` is a JSON array of required features, e.g.,
|
||||
-- ["kvm", "big-parallel"].
|
||||
CREATE TABLE IF NOT EXISTS agent_sessions (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
account_id TEXT NOT NULL REFERENCES accounts(id), -- Account the agent authenticated as.
|
||||
hostname TEXT NOT NULL, -- Self-reported hostname.
|
||||
platforms TEXT NOT NULL, -- JSON array of Nix system strings.
|
||||
system_features TEXT NOT NULL DEFAULT '[]', -- JSON array of system feature strings.
|
||||
concurrency INTEGER NOT NULL DEFAULT 2, -- Max parallel builds this agent supports.
|
||||
agent_version TEXT, -- Agent software version (informational).
|
||||
nix_version TEXT, -- Nix version (informational).
|
||||
connected_at TEXT NOT NULL DEFAULT (datetime('now')), -- When the WebSocket session started.
|
||||
last_heartbeat TEXT NOT NULL DEFAULT (datetime('now')) -- Updated on each keepalive ping.
|
||||
);
|
||||
|
||||
-- ── Jobs ─────────────────────────────────────────────────────────────
|
||||
-- A job is a single CI run triggered by a push or PR event. It
|
||||
-- progresses through:
|
||||
-- pending -> evaluating -> building -> running_effects -> succeeded / failed
|
||||
--
|
||||
-- `sequence_number` is per-(project, ref) and monotonically increases.
|
||||
-- Effects use it to ensure ordering: effects for sequence N cannot
|
||||
-- start until all effects for sequence < N on the same ref are done.
|
||||
--
|
||||
-- Forge/repo metadata is denormalized for convenient display without
|
||||
-- extra joins.
|
||||
CREATE TABLE IF NOT EXISTS jobs (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
project_id TEXT NOT NULL REFERENCES projects(id),
|
||||
forge_type TEXT NOT NULL, -- Denormalized from forges.forge_type.
|
||||
repo_owner TEXT NOT NULL, -- Denormalized from repos.owner.
|
||||
repo_name TEXT NOT NULL, -- Denormalized from repos.name.
|
||||
ref_name TEXT NOT NULL, -- Git ref (e.g., "refs/heads/main").
|
||||
commit_sha TEXT NOT NULL, -- Full 40-char SHA.
|
||||
status TEXT NOT NULL DEFAULT 'pending', -- Job lifecycle state.
|
||||
sequence_number INTEGER NOT NULL DEFAULT 0, -- Per-(project, ref) ordering counter.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
-- Speeds up lookups by (project, ref) for the "latest job on branch"
|
||||
-- query and for sequence-number computation.
|
||||
CREATE INDEX IF NOT EXISTS idx_jobs_project_ref ON jobs(project_id, ref_name);
|
||||
|
||||
-- ── Task Queue ───────────────────────────────────────────────────────
|
||||
-- Unified dispatch queue for all agent work: evaluation, build, and
|
||||
-- effect tasks. Each task optionally specifies a required `platform`
|
||||
-- so the scheduler can route it to a capable agent.
|
||||
--
|
||||
-- Lifecycle: pending -> running -> succeeded / failed
|
||||
--
|
||||
-- If an agent disconnects, its running tasks are reset to pending
|
||||
-- (see `requeue_agent_tasks`).
|
||||
CREATE TABLE IF NOT EXISTS task_queue (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
job_id TEXT NOT NULL REFERENCES jobs(id), -- Owning job.
|
||||
task_type TEXT NOT NULL, -- 'evaluation' | 'build' | 'effect'
|
||||
status TEXT NOT NULL DEFAULT 'pending', -- 'pending' | 'running' | 'succeeded' | 'failed'
|
||||
platform TEXT, -- Required Nix system (NULL = any agent).
|
||||
required_features TEXT NOT NULL DEFAULT '[]', -- JSON array of required system features (future use).
|
||||
payload TEXT NOT NULL, -- JSON blob; schema depends on task_type.
|
||||
agent_session_id TEXT REFERENCES agent_sessions(id), -- Agent that claimed this task (NULL while pending).
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
-- Speeds up `dequeue_task`: find the oldest pending task matching a platform.
|
||||
CREATE INDEX IF NOT EXISTS idx_task_queue_status ON task_queue(status, platform);
|
||||
|
||||
-- ── Attributes (evaluation results) ─────────────────────────────────
|
||||
-- During evaluation the agent walks the flake's `herculesCI` output
|
||||
-- attribute tree and reports each attribute back. Each row records
|
||||
-- the attribute path (JSON array), its type, an optional derivation
|
||||
-- path, and any evaluation error.
|
||||
CREATE TABLE IF NOT EXISTS attributes (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
job_id TEXT NOT NULL REFERENCES jobs(id),
|
||||
path TEXT NOT NULL, -- JSON array of path segments, e.g. '["onPush","default"]'.
|
||||
derivation_path TEXT, -- /nix/store/…drv path, if this attr produces a derivation.
|
||||
attribute_type TEXT NOT NULL DEFAULT 'regular', -- 'regular' | 'effect' | etc.
|
||||
error TEXT, -- Evaluation error message, if any.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
-- Speeds up "get all attributes for a job" queries.
|
||||
CREATE INDEX IF NOT EXISTS idx_attributes_job ON attributes(job_id);
|
||||
|
||||
-- ── Derivation Info ──────────────────────────────────────────────────
|
||||
-- Stores Nix-level metadata from `nix show-derivation` so the
|
||||
-- scheduler knows which platform a build targets without
|
||||
-- re-evaluating.
|
||||
--
|
||||
-- `required_system_features` (JSON array) and `platform` are used to
|
||||
-- match builds to agents. `input_derivations` (JSON array) lists
|
||||
-- transitive build inputs. `outputs` (JSON object) maps output names
|
||||
-- to store paths.
|
||||
CREATE TABLE IF NOT EXISTS derivation_info (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
job_id TEXT NOT NULL REFERENCES jobs(id),
|
||||
derivation_path TEXT NOT NULL, -- /nix/store/…drv path.
|
||||
platform TEXT NOT NULL, -- Nix system string, e.g. "x86_64-linux".
|
||||
required_system_features TEXT NOT NULL DEFAULT '[]', -- JSON array, e.g. '["kvm"]'.
|
||||
input_derivations TEXT NOT NULL DEFAULT '[]', -- JSON array of input .drv paths.
|
||||
outputs TEXT NOT NULL DEFAULT '{}', -- JSON object: {"out": "/nix/store/…", …}.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
-- Speeds up "get all derivation info for a job" queries.
|
||||
CREATE INDEX IF NOT EXISTS idx_derivation_info_job ON derivation_info(job_id);
|
||||
|
||||
-- ── Builds ───────────────────────────────────────────────────────────
|
||||
-- Builds are **deduplicated by derivation path**. If two different
|
||||
-- jobs require the same /nix/store/…drv, only one build record is
|
||||
-- created. The many-to-many `build_jobs` table below tracks which
|
||||
-- jobs share a build.
|
||||
--
|
||||
-- `INSERT OR IGNORE` on the UNIQUE derivation_path column implements
|
||||
-- the deduplication (see `create_or_get_build`).
|
||||
--
|
||||
-- Lifecycle: pending -> building -> succeeded / failed / cancelled
|
||||
CREATE TABLE IF NOT EXISTS builds (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
derivation_path TEXT NOT NULL UNIQUE, -- Deduplication key.
|
||||
status TEXT NOT NULL DEFAULT 'pending', -- Build lifecycle state.
|
||||
agent_session_id TEXT REFERENCES agent_sessions(id), -- Agent that is building (NULL while pending).
|
||||
started_at TEXT, -- Set when status becomes 'building'.
|
||||
completed_at TEXT, -- Set when status reaches a terminal state.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- ── Build-Jobs join table ────────────────────────────────────────────
|
||||
-- Many-to-many relationship between builds and jobs. Because builds
|
||||
-- are deduplicated, a single build can be shared across multiple jobs
|
||||
-- (and even projects). This table lets the job controller query
|
||||
-- "are all builds for job X done?".
|
||||
CREATE TABLE IF NOT EXISTS build_jobs (
|
||||
build_id TEXT NOT NULL REFERENCES builds(id),
|
||||
job_id TEXT NOT NULL REFERENCES jobs(id),
|
||||
PRIMARY KEY (build_id, job_id) -- Composite PK prevents duplicate links.
|
||||
);
|
||||
|
||||
-- ── Effects ──────────────────────────────────────────────────────────
|
||||
-- Effects are post-build side-effects (deploys, notifications, state
|
||||
-- file updates) defined in the `herculesCI.onPush` output. They run
|
||||
-- after all builds for a job complete.
|
||||
--
|
||||
-- Effects are serialised per (project, ref): effects for sequence
|
||||
-- number N do not start until all effects for sequence < N on the
|
||||
-- same ref have completed. This prevents overlapping deploys.
|
||||
--
|
||||
-- Lifecycle: pending -> running -> succeeded / failed / cancelled
|
||||
CREATE TABLE IF NOT EXISTS effects (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
job_id TEXT NOT NULL REFERENCES jobs(id),
|
||||
attribute_path TEXT NOT NULL, -- JSON array of the Nix attribute path.
|
||||
derivation_path TEXT NOT NULL, -- /nix/store/…drv path of the effect derivation.
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
started_at TEXT, -- Set when status becomes 'running'.
|
||||
completed_at TEXT, -- Set on terminal status.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
-- Speeds up "get all effects for a job" queries.
|
||||
CREATE INDEX IF NOT EXISTS idx_effects_job ON effects(job_id);
|
||||
|
||||
-- ── State Files ──────────────────────────────────────────────────────
|
||||
-- Implements the Hercules CI `hci state` feature: a key-value store
|
||||
-- of versioned binary blobs scoped per project. Effects can read and
|
||||
-- write state files to persist data across CI runs (e.g., Terraform
|
||||
-- state, deployment manifests).
|
||||
--
|
||||
-- Each write bumps the `version` counter and replaces the `data` BLOB.
|
||||
-- The composite primary key (project_id, name) enforces uniqueness.
|
||||
CREATE TABLE IF NOT EXISTS state_files (
|
||||
project_id TEXT NOT NULL REFERENCES projects(id),
|
||||
name TEXT NOT NULL, -- User-defined state file name.
|
||||
data BLOB NOT NULL, -- Raw binary payload.
|
||||
version INTEGER NOT NULL DEFAULT 1, -- Monotonically increasing on each write.
|
||||
size_bytes INTEGER NOT NULL DEFAULT 0, -- Cached size for listing without loading data.
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
PRIMARY KEY (project_id, name)
|
||||
);
|
||||
|
||||
-- ── State Locks ──────────────────────────────────────────────────────
|
||||
-- Distributed advisory locks with automatic lease expiry. Effects
|
||||
-- acquire a lock before reading/writing a state file to prevent
|
||||
-- concurrent modifications from parallel jobs.
|
||||
--
|
||||
-- The UNIQUE(project_id, name) constraint enforces mutual exclusion:
|
||||
-- only one lock per (project, name) can exist at a time. Expired
|
||||
-- locks are cleaned up lazily on acquire and periodically by a
|
||||
-- background janitor.
|
||||
CREATE TABLE IF NOT EXISTS state_locks (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
project_id TEXT NOT NULL REFERENCES projects(id),
|
||||
name TEXT NOT NULL, -- Lock name (typically matches the state file name).
|
||||
owner TEXT NOT NULL, -- Free-form identifier of the lock holder.
|
||||
expires_at TEXT NOT NULL, -- Lease expiry; after this time the lock is stale.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(project_id, name) -- At most one active lock per (project, name).
|
||||
);
|
||||
|
||||
-- ── Secrets ──────────────────────────────────────────────────────────
|
||||
-- Encrypted JSON blobs scoped to a project. Secrets are delivered to
|
||||
-- the agent during effect execution when the `condition` matches
|
||||
-- (e.g., "always", or only for pushes to the default branch).
|
||||
--
|
||||
-- The `data` column stores the secret payload as JSON text. At the
|
||||
-- Rust layer it is wrapped in `Sensitive<_>` to prevent accidental
|
||||
-- logging.
|
||||
CREATE TABLE IF NOT EXISTS secrets (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
project_id TEXT NOT NULL REFERENCES projects(id),
|
||||
name TEXT NOT NULL, -- User-defined secret name.
|
||||
data TEXT NOT NULL, -- JSON blob with the secret payload.
|
||||
condition TEXT NOT NULL DEFAULT '"always"', -- JSON-serialized SecretCondition enum.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(project_id, name) -- One secret per name per project.
|
||||
);
|
||||
|
||||
-- ── Log Entries ──────────────────────────────────────────────────────
|
||||
-- Agents stream structured log lines while executing tasks (evaluation,
|
||||
-- build, or effect). Each line has a zero-based index, a millisecond
|
||||
-- timestamp, a message string, and a severity level.
|
||||
--
|
||||
-- Uses INTEGER PRIMARY KEY AUTOINCREMENT as a surrogate key (not UUID)
|
||||
-- for insert performance on high-volume log streams.
|
||||
CREATE TABLE IF NOT EXISTS log_entries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
task_id TEXT NOT NULL, -- The task producing these logs.
|
||||
line_index INTEGER NOT NULL, -- Zero-based line number within the task.
|
||||
timestamp_ms INTEGER NOT NULL, -- Milliseconds since epoch for the log line.
|
||||
message TEXT NOT NULL, -- Log message content.
|
||||
level TEXT NOT NULL DEFAULT 'info', -- 'debug' | 'info' | 'warn' | 'error'
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
-- Speeds up paginated log retrieval: "get lines N..N+limit for task X".
|
||||
CREATE INDEX IF NOT EXISTS idx_log_entries_task ON log_entries(task_id, line_index);
|
||||
|
||||
-- ── Schedules ────────────────────────────────────────────────────────
|
||||
-- Cron-based job triggers. When enabled, the scheduler creates a new
|
||||
-- job at the configured interval on the specified ref.
|
||||
-- (Future feature -- not yet wired into the scheduler.)
|
||||
CREATE TABLE IF NOT EXISTS schedules (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
project_id TEXT NOT NULL REFERENCES projects(id),
|
||||
cron_expression TEXT NOT NULL, -- Standard 5-field cron expression.
|
||||
ref_name TEXT NOT NULL DEFAULT 'main', -- Git ref to evaluate.
|
||||
enabled INTEGER NOT NULL DEFAULT 1, -- 1 = active, 0 = paused.
|
||||
last_triggered_at TEXT, -- When the cron last fired.
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
-- =======================================================================
|
||||
-- Add password-based authentication for accounts
|
||||
-- =======================================================================
|
||||
--
|
||||
-- The initial schema only supported agent authentication via cluster
|
||||
-- join tokens (bcrypt-hashed bearer tokens). This migration adds a
|
||||
-- `password_hash` column to the `accounts` table so that human users
|
||||
-- can also authenticate with a username + password (bcrypt-hashed).
|
||||
--
|
||||
-- The column is nullable: accounts that authenticate exclusively via
|
||||
-- forge OAuth (GitHub, Gitea, etc.) will leave it NULL. The auth
|
||||
-- layer checks for NULL before attempting bcrypt verification.
|
||||
|
||||
ALTER TABLE accounts ADD COLUMN password_hash TEXT;
|
||||
Loading…
Add table
Add a link
Reference in a new issue