init
This commit is contained in:
commit
fd80fbab7e
48 changed files with 16775 additions and 0 deletions
18
crates/jupiter-scheduler/Cargo.toml
Normal file
18
crates/jupiter-scheduler/Cargo.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[package]
|
||||
name = "jupiter-scheduler"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[dependencies]
|
||||
jupiter-api-types = { workspace = true }
|
||||
jupiter-db = { workspace = true }
|
||||
jupiter-forge = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
1327
crates/jupiter-scheduler/src/engine.rs
Normal file
1327
crates/jupiter-scheduler/src/engine.rs
Normal file
File diff suppressed because it is too large
Load diff
68
crates/jupiter-scheduler/src/error.rs
Normal file
68
crates/jupiter-scheduler/src/error.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
//! Error types for the Jupiter scheduler.
|
||||
//!
|
||||
//! Every fallible scheduler operation returns [`Result<T>`], which uses
|
||||
//! [`SchedulerError`] as its error type. Errors originate from three main
|
||||
//! sources:
|
||||
//!
|
||||
//! - **Database layer** (`jupiter_db`) -- query failures, constraint violations,
|
||||
//! connection issues.
|
||||
//! - **Forge layer** (`jupiter_forge`) -- failures when reporting commit status
|
||||
//! back to GitHub, Gitea, Radicle, etc.
|
||||
//! - **Scheduler logic** -- invalid state transitions, missing jobs, or missing
|
||||
//! agents for a required platform.
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
/// Errors that can occur during scheduler operations.
|
||||
///
|
||||
/// The scheduler is intentionally lenient: most errors are logged and do **not**
|
||||
/// crash the event loop (see [`crate::engine::SchedulerEngine::run`]). Individual
|
||||
/// event handlers return `Result<()>` so the loop can log the failure and
|
||||
/// continue processing the next event.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum SchedulerError {
|
||||
/// A database operation failed. This wraps errors from the `jupiter_db`
|
||||
/// crate and can indicate connection failures, SQL constraint violations,
|
||||
/// or missing rows.
|
||||
#[error("database error: {0}")]
|
||||
Db(#[from] jupiter_db::error::DbError),
|
||||
|
||||
/// A forge API call failed. This typically occurs when reporting commit
|
||||
/// status back to the forge (GitHub, Gitea, etc.) and could be caused by
|
||||
/// network issues, expired tokens, or rate limiting.
|
||||
#[error("forge error: {0}")]
|
||||
Forge(#[from] jupiter_forge::error::ForgeError),
|
||||
|
||||
/// No connected agent can serve the requested platform (e.g.,
|
||||
/// `x86_64-linux`, `aarch64-darwin`). The task remains in the queue and
|
||||
/// will be picked up when a suitable agent connects.
|
||||
#[error("no agent available for platform: {0}")]
|
||||
NoAgentAvailable(String),
|
||||
|
||||
/// A job with the given UUID was not found in the database. This can
|
||||
/// happen if a job is deleted while events referencing it are still
|
||||
/// in-flight in the scheduler channel.
|
||||
#[error("job not found: {0}")]
|
||||
JobNotFound(uuid::Uuid),
|
||||
|
||||
/// An attempted job state transition is not valid. For example, trying to
|
||||
/// move a `Succeeded` job to `Evaluating` without going through a re-run
|
||||
/// reset. The `from` and `to` fields contain human-readable state names.
|
||||
///
|
||||
/// Valid transitions are documented in [`crate::engine::SchedulerEngine`].
|
||||
#[error("invalid state transition: {from} -> {to}")]
|
||||
InvalidTransition {
|
||||
/// The current state of the job at the time of the transition attempt.
|
||||
from: String,
|
||||
/// The target state that was rejected.
|
||||
to: String,
|
||||
},
|
||||
|
||||
/// A catch-all for internal scheduler errors that do not fit the other
|
||||
/// variants. The contained string provides a human-readable description.
|
||||
#[error("scheduler error: {0}")]
|
||||
Internal(String),
|
||||
}
|
||||
|
||||
/// Convenience alias used throughout the scheduler crate.
|
||||
pub type Result<T> = std::result::Result<T, SchedulerError>;
|
||||
65
crates/jupiter-scheduler/src/lib.rs
Normal file
65
crates/jupiter-scheduler/src/lib.rs
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
//! # Jupiter Scheduler
|
||||
//!
|
||||
//! The scheduler is the central orchestration engine of **Jupiter**, a self-hosted,
|
||||
//! wire-compatible replacement for [hercules-ci.com](https://hercules-ci.com).
|
||||
//!
|
||||
//! ## Role in the Jupiter architecture
|
||||
//!
|
||||
//! Jupiter follows the Hercules CI model where a server coordinates work that is
|
||||
//! executed by remote *agents*. The scheduler is the "brain" that drives every job
|
||||
//! through the Hercules CI pipeline:
|
||||
//!
|
||||
//! ```text
|
||||
//! ForgeEvent --> Job creation --> Evaluation --> Build --> Effects --> Done
|
||||
//! ```
|
||||
//!
|
||||
//! It runs as a long-lived background **tokio task** (see [`engine::SchedulerEngine::run`])
|
||||
//! and receives [`engine::SchedulerEvent`]s over a bounded `mpsc` channel from:
|
||||
//!
|
||||
//! - **Webhook handlers** -- forge push / PR / patch events.
|
||||
//! - **WebSocket handler** -- messages from connected Hercules CI agents reporting
|
||||
//! evaluation results, build completions, effect outcomes, etc.
|
||||
//! - **REST endpoints** -- user-initiated actions such as re-running or cancelling
|
||||
//! a job.
|
||||
//!
|
||||
//! ## Pipeline state machine
|
||||
//!
|
||||
//! Each **Job** progresses through the following states (see also
|
||||
//! [`engine::SchedulerEngine`] for transition logic):
|
||||
//!
|
||||
//! ```text
|
||||
//! ┌──────────┐ ┌────────────┐ ┌──────────┐ ┌────────────────┐ ┌───────────┐
|
||||
//! │ Pending │───>│ Evaluating │───>│ Building │───>│ RunningEffects │───>│ Succeeded │
|
||||
//! └──────────┘ └────────────┘ └──────────┘ └────────────────┘ └───────────┘
|
||||
//! │ │ │
|
||||
//! v v v
|
||||
//! ┌──────────────┐ ┌──────────┐ ┌──────────┐
|
||||
//! │ErrorEvaluating│ │ Failed │ │ Failed │
|
||||
//! └──────────────┘ └──────────┘ └──────────┘
|
||||
//! ```
|
||||
//!
|
||||
//! Any state can also transition to `Cancelled` via user action.
|
||||
//!
|
||||
//! ## Concurrency model
|
||||
//!
|
||||
//! - **Effects within a single job** run concurrently -- they are all enqueued at
|
||||
//! once when the job enters `RunningEffects`.
|
||||
//! - **Effects across jobs on the same project + ref** are serialized via a
|
||||
//! `sequence_number` to prevent ordering hazards (e.g., two pushes deploying
|
||||
//! out of order).
|
||||
//! - **Builds are deduplicated** by derivation path: if two jobs need the same
|
||||
//! `.drv`, only one `Build` record is created and linked to both jobs.
|
||||
//! - **Agent disconnection** causes all in-flight tasks assigned to that agent to
|
||||
//! be returned to `Pending` state so another agent can pick them up.
|
||||
//! - **IFD (import-from-derivation)** requires at least 2 concurrent task slots on
|
||||
//! `x86_64-linux` agents to avoid deadlock (the evaluating agent must be able to
|
||||
//! build the IFD derivation while still running the evaluation).
|
||||
//!
|
||||
//! ## Crate layout
|
||||
//!
|
||||
//! - [`engine`] -- The [`SchedulerEngine`](engine::SchedulerEngine) struct and the
|
||||
//! [`SchedulerEvent`](engine::SchedulerEvent) enum that drives it.
|
||||
//! - [`error`] -- Error types returned by scheduler operations.
|
||||
|
||||
pub mod engine;
|
||||
pub mod error;
|
||||
Loading…
Add table
Add a link
Reference in a new issue