diff --git a/Cargo.lock b/Cargo.lock
index bc8c4c1..2ad1ecd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -42,12 +42,13 @@ dependencies = [
 
 [[package]]
 name = "aikit-sdk"
-version = "0.1.49"
-source = "git+https://github.com/goaikit/aikit?branch=main#60577a3d43d1396b24e2879644f8cf5b93408a4f"
+version = "0.1.50"
+source = "git+https://github.com/goaikit/aikit?branch=main#7380ef9c33c7915d32679da98a818cf7116967fb"
 dependencies = [
  "glob",
  "reqwest 0.12.28",
  "serde",
+ "serde_json",
  "tempfile",
  "toml",
  "walkdir",
diff --git a/src/cli/cli.rs b/src/cli/cli.rs
index 5e26959..b58eaf2 100644
--- a/src/cli/cli.rs
+++ b/src/cli/cli.rs
@@ -5,7 +5,7 @@ use fastskill::FastSkillService;
 use std::sync::Arc;
 
 use crate::cli::commands::{
-    add, analyze, auth, disable, init, install, list, marketplace, package, publish, read,
+    add, analyze, auth, disable, eval, init, install, list, marketplace, package, publish, read,
     registry, reindex, remove, repos, search, serve, show, sources, sync, update, version,
     Commands,
 };
@@ -109,6 +109,10 @@ impl Cli {
             return auth::execute_auth(args).await;
         }
 
+        if let Some(Commands::Eval(args)) = self.command {
+            return eval::execute_eval(args).await;
+        }
+
         // For other commands, we need to restore self.command, so we need a different approach
         // Actually, if we get here, command was None or not Init/Repository/Package/Publish/RegistryIndex
         let command = self.command;
@@ -155,6 +159,7 @@ impl Cli {
             | Some(Commands::Repos(_))
             | Some(Commands::Marketplace(_))
             | Some(Commands::Auth(_))
+            | Some(Commands::Eval(_))
             | Some(Commands::Version(_)) => unreachable!("Handled above"),
             None => {
                 // No subcommand - treat as skill ID for read command
diff --git a/src/cli/commands/eval/mod.rs b/src/cli/commands/eval/mod.rs
new file mode 100644
index 0000000..0b2888a
--- /dev/null
+++ b/src/cli/commands/eval/mod.rs
@@ -0,0 +1,62 @@
+//! Eval command group for skill quality assurance
+
+pub mod report;
+pub mod run;
+pub mod score;
+pub mod validate;
+
+use crate::cli::error::CliResult;
+use clap::{Args, Subcommand};
+
+/// Eval command group
+#[derive(Debug, Args)]
+#[command(
+    about = "Evaluation commands for skill quality assurance",
+    after_help = "Examples:\n  fastskill eval validate\n  fastskill eval run --agent codex --output-dir /tmp/evals"
+)]
+pub struct EvalCommand {
+    #[command(subcommand)]
+    pub command: EvalSubcommand,
+}
+
+/// Eval subcommands
+#[derive(Debug, Subcommand)]
+pub enum EvalSubcommand {
+    /// Validate eval configuration and files
+    #[command(
+        about = "Validate eval configuration and files",
+        after_help = "Examples:\n  fastskill eval validate\n  fastskill eval validate --agent codex"
+    )]
+    Validate(validate::ValidateArgs),
+
+    /// Run eval cases against an agent
+    #[command(
+        about = "Run eval cases against an agent",
+        after_help = "Examples:\n  fastskill eval run --agent codex --output-dir /tmp/evals"
+    )]
+    Run(run::RunArgs),
+
+    /// Show a report for a completed eval run
+    #[command(
+        about = "Show a report for a completed eval run",
+        after_help = "Examples:\n  fastskill eval report --run-dir /tmp/evals/2026-04-01T14-32-10Z"
+    )]
+    Report(report::ReportArgs),
+
+    /// Re-score saved eval artifacts without running the agent again
+    #[command(
+        about = "Re-score saved eval artifacts without running the agent again",
+        after_help = "Examples:\n  fastskill eval score --run-dir /tmp/evals/2026-04-01T14-32-10Z"
+    )]
+    Score(score::ScoreArgs),
+}
+
+/// Execute the eval command group
+pub async fn execute_eval(args: EvalCommand) -> CliResult<()> {
+    match args.command {
+        EvalSubcommand::Validate(args) => validate::execute_validate(args).await,
+        EvalSubcommand::Run(args) => run::execute_run(args).await,
+        EvalSubcommand::Report(args) => report::execute_report(args).await,
+        EvalSubcommand::Score(args) => score::execute_score(args).await,
+    }
+}
diff --git a/src/cli/commands/eval/report.rs b/src/cli/commands/eval/report.rs
new file mode 100644
index 0000000..a8dda91
--- /dev/null
+++ b/src/cli/commands/eval/report.rs
@@ -0,0 +1,80 @@
+//! Eval report subcommand - artifact summary and formatting
+
+use crate::cli::commands::common::validate_format_args;
+use crate::cli::error::{CliError, CliResult};
+use clap::Args;
+use fastskill::eval::artifacts::read_summary;
+use fastskill::OutputFormat;
+use std::path::PathBuf;
+
+/// Arguments for `fastskill eval report`
+#[derive(Debug, Args)]
+#[command(
+    about = "Show a report for a completed eval run",
+    after_help = "Examples:\n  fastskill eval report --run-dir /tmp/evals/2026-04-01T14-32-10Z\n  fastskill eval report --run-dir ./evals/2026-04-01T14-32-10Z --json"
+)]
+pub struct ReportArgs {
+    /// Path to the specific run directory
+    #[arg(long, required = true)]
+    pub run_dir: PathBuf,
+
+    /// Output format: table, json, grid, xml (default: table)
+    #[arg(long, value_enum, help = "Output format: table, json, grid, xml")]
+    pub format: Option<OutputFormat>,
+
+    /// Shorthand for --format json
+    #[arg(long, help = "Shorthand for --format json")]
+    pub json: bool,
+}
+
+/// Execute the `eval report` command
+pub async fn execute_report(args: ReportArgs) -> CliResult<()> {
+    let format = validate_format_args(&args.format, args.json)?;
+    let use_json = format == OutputFormat::Json;
+
+    if !args.run_dir.exists() {
+        return Err(CliError::Config(format!(
+            "EVAL_ARTIFACTS_CORRUPT: Run directory does not exist: {}",
+            args.run_dir.display()
+        )));
+    }
+
+    let summary = read_summary(&args.run_dir).map_err(|e| {
+        CliError::Config(format!(
+            "EVAL_ARTIFACTS_CORRUPT: Failed to read summary.json: {}",
+            e
+        ))
+    })?;
+
+    if use_json {
+        println!(
+            "{}",
+            serde_json::to_string_pretty(&summary).unwrap_or_default()
+        );
+    } else {
+        println!("Eval Report");
+        println!("  run_dir: {}", args.run_dir.display());
+        println!("  agent: {}", summary.agent);
+        if let Some(ref model) = summary.model {
+            println!("  model: {}", model);
+        }
+        println!(
+            "  result: {}",
+            if summary.suite_pass {
+                "PASSED"
+            } else {
+                "FAILED"
+            }
+        );
+        println!("  cases: {}/{} passed", summary.passed, summary.total_cases);
+
+        if !summary.cases.is_empty() {
+            println!("\nCase Results:");
+            for case in &summary.cases {
+                println!("  [{}] {}", case.status, case.id);
+            }
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/cli/commands/eval/run.rs b/src/cli/commands/eval/run.rs
new file mode 100644
index 0000000..8ce7fd2
--- /dev/null
+++ b/src/cli/commands/eval/run.rs
@@ -0,0 +1,272 @@
+//! Eval run subcommand - case execution orchestration
+
+use crate::cli::commands::common::validate_format_args;
+use crate::cli::error::{CliError, CliResult};
+use aikit_sdk::{is_agent_available, is_runnable, runnable_agents};
+use chrono::Utc;
+use clap::Args;
+use fastskill::core::project::resolve_project_file;
+use fastskill::eval::artifacts::{
+    allocate_run_dir, write_case_artifacts, write_summary, CaseSummary, SummaryResult,
+};
+use fastskill::eval::checks::load_checks;
+use fastskill::eval::config::resolve_eval_config;
+use fastskill::eval::runner::{AikitEvalRunner, CaseRunOptions, EvalRunner};
+use fastskill::eval::suite::load_suite;
+use fastskill::OutputFormat;
+use std::env;
+use std::path::PathBuf;
+
+/// Arguments for `fastskill eval run`
+#[derive(Debug, Args)]
+#[command(
+    about = "Run eval cases against an agent",
+    after_help = "Examples:\n  fastskill eval run --agent codex --output-dir /tmp/evals\n  fastskill eval run --agent claude --output-dir ./evals --case my-case\n  fastskill eval run --agent codex --output-dir ./evals --tag basic"
+)]
+pub struct RunArgs {
+    /// Agent to use for execution (required)
+    #[arg(long, required = true, value_parser = validate_agent_key_for_run)]
+    pub agent: String,
+
+    /// Output directory for artifacts (required)
+    #[arg(long, required = true)]
+    pub output_dir: PathBuf,
+
+    /// Optional model override forwarded to the agent
+    #[arg(long)]
+    pub model: Option<String>,
+
+    /// Filter: run only the case with this ID
+    #[arg(long)]
+    pub case: Option<String>,
+
+    /// Filter: run only cases with this tag
+    #[arg(long)]
+    pub tag: Option<String>,
+
+    /// Output format: table, json, grid, xml (default: table)
+    #[arg(long, value_enum, help = "Output format: table, json, grid, xml")]
+    pub format: Option<OutputFormat>,
+
+    /// Shorthand for --format json
+    #[arg(long, help = "Shorthand for --format json")]
+    pub json: bool,
+
+    /// Do not fail with non-zero exit code on suite failure
+    #[arg(long)]
+    pub no_fail: bool,
+}
+
+fn validate_agent_key_for_run(s: &str) -> Result<String, String> {
+    if is_runnable(s) {
+        Ok(s.to_string())
+    } else {
+        Err(format!(
+            "'{}' is not a supported agent. Supported: {}",
+            s,
+            runnable_agents().join(", ")
+        ))
+    }
+}
+
+/// Execute the `eval run` command using the default aikit-backed runner.
+pub async fn execute_run(args: RunArgs) -> CliResult<()> {
+    execute_run_with_runner(args, &AikitEvalRunner).await
+}
+
+/// Execute `eval run` with an injectable [`EvalRunner`] (tests or future adapters).
+pub async fn execute_run_with_runner<R: EvalRunner + ?Sized>(
+    args: RunArgs,
+    runner: &R,
+) -> CliResult<()> {
+    let format = validate_format_args(&args.format, args.json)?;
+    let use_json = format == OutputFormat::Json;
+
+    let current_dir = env::current_dir()
+        .map_err(|e| CliError::Config(format!("Failed to get current directory: {}", e)))?;
+
+    let resolution = resolve_project_file(&current_dir);
+    if !resolution.found {
+        return Err(CliError::Config(
+            "EVAL_CONFIG_MISSING: No skill-project.toml found. Run 'fastskill init' first."
+                .to_string(),
+        ));
+    }
+
+    let project_root = resolution
+        .path
+        .parent()
+        .unwrap_or(&current_dir)
+        .to_path_buf();
+
+    let eval_config = resolve_eval_config(&resolution.path, &project_root)
+        .map_err(|e| CliError::Config(e.to_string()))?;
+
+    // Check agent availability
+    if eval_config.fail_on_missing_agent && !is_agent_available(&args.agent) {
+        return Err(CliError::Config(format!(
+            "EVAL_AGENT_UNAVAILABLE: Agent '{}' is not available. Install it first.",
+            args.agent
+        )));
+    }
+
+    // Load suite
+    let mut suite =
+        load_suite(&eval_config.prompts_path).map_err(|e| CliError::Config(e.to_string()))?;
+
+    // Apply filters
+    if let Some(ref case_id) = args.case {
+        suite = suite.filter_by_id(case_id);
+        if suite.cases.is_empty() {
+            return Err(CliError::Config(format!(
+                "No case found with id '{}'",
+                case_id
+            )));
+        }
+    }
+    if let Some(ref tag) = args.tag {
+        suite = suite.filter_by_tag(tag);
+        if suite.cases.is_empty() {
+            return Err(CliError::Config(format!(
+                "No cases found with tag '{}'",
+                tag
+            )));
+        }
+    }
+
+    // Load checks if configured
+    let checks = if let Some(ref checks_path) = eval_config.checks_path {
+        load_checks(checks_path).map_err(|e| CliError::Config(e.to_string()))?
+    } else {
+        vec![]
+    };
+
+    // Allocate run directory
+    let run_id = Utc::now().format("%Y-%m-%dT%H-%M-%SZ").to_string();
+    std::fs::create_dir_all(&args.output_dir).map_err(|e| {
+        CliError::Config(format!(
+            "Failed to create output directory '{}': {}",
+            args.output_dir.display(),
+            e
+        ))
+    })?;
+    let run_dir =
+        allocate_run_dir(&args.output_dir, &run_id).map_err(|e| CliError::Config(e.to_string()))?;
+
+    let run_opts = CaseRunOptions {
+        agent_key: args.agent.clone(),
+        model: args.model.clone(),
+        project_root: project_root.clone(),
+        timeout_seconds: eval_config.timeout_seconds,
+    };
+
+    if !use_json {
+        eprintln!(
+            "Running {} eval case(s) with agent '{}'...",
+            suite.cases.len(),
+            args.agent
+        );
+    }
+
+    let mut case_results = Vec::new();
+    let mut case_summaries = Vec::new();
+
+    for case in &suite.cases {
+        if !use_json {
+            eprintln!("  Running case '{}'...", case.id);
+        }
+
+        let (run_output, case_result, trace_jsonl) =
+            runner.run_case(case, &run_opts, &checks).await;
+
+        // Write artifacts
+        if let Err(e) = write_case_artifacts(
+            &run_dir,
+            &case.id,
+            &run_output.stdout,
+            &run_output.stderr,
+            &trace_jsonl,
+            &case_result,
+        ) {
+            if !use_json {
+                eprintln!(
+                    "  warning: failed to write artifacts for case '{}': {}",
+                    case.id, e
+                );
+            }
+        }
+
+        let summary_entry = CaseSummary {
+            id: case_result.id.clone(),
+            status: case_result.status.clone(),
+            command_count: case_result.command_count,
+            input_tokens: case_result.input_tokens,
+            output_tokens: case_result.output_tokens,
+        };
+
+        case_summaries.push(summary_entry);
+        case_results.push(case_result);
+    }
+
+    let passed = case_results
+        .iter()
+        .filter(|r| r.status == fastskill::eval::artifacts::CaseStatus::Passed)
+        .count();
+    let failed = case_results.len() - passed;
+    let suite_pass = failed == 0;
+
+    let summary = SummaryResult {
+        suite_pass,
+        agent: args.agent.clone(),
+        model: args.model.clone(),
+        total_cases: case_results.len(),
+        passed,
+        failed,
+        run_dir: run_dir.clone(),
+        checks_path: eval_config.checks_path.map(|p| {
+            if p.is_absolute() {
+                p
+            } else {
+                project_root.join(p)
+            }
+        }),
+        skill_project_root: project_root,
+        cases: case_summaries,
+    };
+
+    // Write summary
+    if let Err(e) = write_summary(&run_dir, &summary) {
+        if !use_json {
+            eprintln!("warning: failed to write summary.json: {}", e);
+        }
+    }
+
+    if use_json {
+        println!(
+            "{}",
+            serde_json::to_string_pretty(&summary).unwrap_or_default()
+        );
+    } else {
+        println!(
+            "\nEval run complete: {}/{} passed",
+            passed,
+            case_results.len()
+        );
+        println!("  run_dir: {}", run_dir.display());
+        if suite_pass {
+            println!("  result: PASSED");
+        } else {
+            println!("  result: FAILED ({} case(s) failed)", failed);
+        }
+    }
+
+    if !suite_pass && !args.no_fail {
+        return Err(CliError::Config(format!(
+            "Eval suite failed: {}/{} cases passed",
+            passed,
+            case_results.len()
+        )));
+    }
+
+    Ok(())
+}
diff --git a/src/cli/commands/eval/score.rs b/src/cli/commands/eval/score.rs
new file mode 100644
index 0000000..6d9c84d
--- /dev/null
+++ b/src/cli/commands/eval/score.rs
@@ -0,0 +1,146 @@
+//! Eval score subcommand - offline re-scoring from saved artifacts
+
+use crate::cli::commands::common::validate_format_args;
+use crate::cli::error::{CliError, CliResult};
+use clap::Args;
+use fastskill::eval::artifacts::{read_summary, write_summary, CaseStatus};
+use fastskill::eval::checks::load_checks;
+use fastskill::OutputFormat;
+use std::path::PathBuf;
+
+/// Arguments for `fastskill eval score`
+#[derive(Debug, Args)]
+#[command(
+    about = "Re-score saved eval artifacts without running the agent again",
+    after_help = "Examples:\n  fastskill eval score --run-dir /tmp/evals/2026-04-01T14-32-10Z\n  fastskill eval score --run-dir ./evals/2026-04-01T14-32-10Z --json"
+)]
+pub struct ScoreArgs {
+    /// Path to the run directory to re-score
+    #[arg(long, required = true)]
+    pub run_dir: PathBuf,
+
+    /// Output format: table, json, grid, xml (default: table)
+    #[arg(long, value_enum, help = "Output format: table, json, grid, xml")]
+    pub format: Option<OutputFormat>,
+
+    /// Shorthand for --format json
+    #[arg(long, help = "Shorthand for --format json")]
+    pub json: bool,
+
+    /// Do not fail with non-zero exit code on suite failure
+    #[arg(long)]
+    pub no_fail: bool,
+}
+
+/// Execute the `eval score` command
+pub async fn execute_score(args: ScoreArgs) -> CliResult<()> {
+    let format = validate_format_args(&args.format, args.json)?;
+    let use_json = format == OutputFormat::Json;
+
+    if !args.run_dir.exists() {
+        return Err(CliError::Config(format!(
+            "EVAL_ARTIFACTS_CORRUPT: Run directory does not exist: {}",
+            args.run_dir.display()
+        )));
+    }
+
+    // Read existing summary
+    let mut summary = read_summary(&args.run_dir).map_err(|e| {
+        CliError::Config(format!(
+            "EVAL_ARTIFACTS_CORRUPT: Failed to read summary.json: {}",
+            e
+        ))
+    })?;
+
+    // Validate that we have usable paths
+    let checks_path = summary.checks_path.as_ref().ok_or_else(|| {
+        CliError::Config(
+            "EVAL_ARTIFACTS_CORRUPT: summary.json lacks checks_path - cannot re-score".to_string(),
+        )
+    })?;
+
+    if !checks_path.exists() {
+        return Err(CliError::Config(format!(
+            "EVAL_ARTIFACTS_CORRUPT: checks_path '{}' does not exist",
+            checks_path.display()
+        )));
+    }
+
+    // Load checks
+    let checks = load_checks(checks_path).map_err(|e| CliError::Config(e.to_string()))?;
+
+    // Read existing case artifacts and re-score
+    let mut new_passed = 0;
+    let mut new_failed = 0;
+
+    let mut updated_cases = summary.cases.clone();
+
+    for case_summary in &mut updated_cases {
+        let case_dir = args.run_dir.join(&case_summary.id);
+        if !case_dir.exists() {
+            continue;
+        }
+
+        let stdout_path = case_dir.join("stdout.txt");
+        let trace_path = case_dir.join("trace.jsonl");
+
+        let stdout_content = std::fs::read_to_string(&stdout_path).unwrap_or_default();
+        let trace_jsonl = std::fs::read_to_string(&trace_path).unwrap_or_default();
+
+        let check_results = fastskill::eval::checks::run_checks(
+            &checks,
+            &stdout_content,
+            &trace_jsonl,
+            &summary.skill_project_root,
+        );
+
+        let all_passed = check_results.iter().all(|r| r.passed);
+        case_summary.status = if all_passed {
+            CaseStatus::Passed
+        } else {
+            CaseStatus::Failed
+        };
+
+        if all_passed {
+            new_passed += 1;
+        } else {
+            new_failed += 1;
+        }
+    }
+
+    summary.passed = new_passed;
+    summary.failed = new_failed;
+    summary.suite_pass = new_failed == 0;
+    summary.cases = updated_cases;
+
+    // Update summary.json
+    write_summary(&args.run_dir, &summary)
+        .map_err(|e| CliError::Config(format!("Failed to write updated summary.json: {}", e)))?;
+
+    if use_json {
+        println!(
+            "{}",
+            serde_json::to_string_pretty(&summary).unwrap_or_default()
+        );
+    } else {
+        println!("Re-scoring complete");
+        println!(
+            "  result: {}",
+            if summary.suite_pass {
+                "PASSED"
+            } else {
+                "FAILED"
+            }
+        );
+        println!("  cases: {}/{} passed", summary.passed, summary.total_cases);
+    }
+
+    if !summary.suite_pass && !args.no_fail {
+        return Err(CliError::Config(format!(
+            "Eval suite failed: {}/{} cases passed after re-scoring",
+            summary.passed, summary.total_cases
+        )));
+    }
+
+    Ok(())
+}
diff --git a/src/cli/commands/eval/validate.rs b/src/cli/commands/eval/validate.rs
new file mode 100644
index 0000000..3c553c4
--- /dev/null
+++ b/src/cli/commands/eval/validate.rs
@@ -0,0 +1,126 @@
+//! Eval validate subcommand - configuration and file validation
+
+use crate::cli::commands::common::validate_format_args;
+use crate::cli::error::{CliError, CliResult};
+use aikit_sdk::{is_agent_available, is_runnable, runnable_agents};
+use clap::Args;
+use fastskill::core::project::resolve_project_file;
+use fastskill::eval::config::resolve_eval_config;
+use fastskill::OutputFormat;
+use std::env;
+
+/// Arguments for `fastskill eval validate`
+#[derive(Debug, Args)]
+#[command(
+    about = "Validate eval configuration and files",
+    after_help = "Examples:\n  fastskill eval validate\n  fastskill eval validate --agent codex"
+)]
+pub struct ValidateArgs {
+    /// Check agent availability for the specified agent key
+    #[arg(long, value_parser = validate_agent_key_parser)]
+    pub agent: Option<String>,
+
+    /// Output format: table, json, grid, xml (default: table)
+    #[arg(long, value_enum, help = "Output format: table, json, grid, xml")]
+    pub format: Option<OutputFormat>,
+
+    /// Shorthand for --format json
+    #[arg(long, help = "Shorthand for --format json")]
+    pub json: bool,
+}
+
+fn validate_agent_key_parser(s: &str) -> Result<String, String> {
+    if is_runnable(s) {
+        Ok(s.to_string())
+    } else {
+        Err(format!(
+            "'{}' is not a supported agent. Supported: {}",
+            s,
+            runnable_agents().join(", ")
+        ))
+    }
+}
+
+/// Execute the `eval validate` command
+pub async fn execute_validate(args: ValidateArgs) -> CliResult<()> {
+    let format = validate_format_args(&args.format, args.json)?;
+    let use_json = format == OutputFormat::Json;
+
+    let current_dir = env::current_dir()
+        .map_err(|e| CliError::Config(format!("Failed to get current directory: {}", e)))?;
+
+    let resolution = resolve_project_file(&current_dir);
+
+    if !resolution.found {
+        return Err(CliError::Config(
+            "EVAL_CONFIG_MISSING: No skill-project.toml found. Run 'fastskill init' first."
+                .to_string(),
+        ));
+    }
+
+    let project_root = resolution
+        .path
+        .parent()
+        .unwrap_or(&current_dir)
+        .to_path_buf();
+
+    let eval_config = resolve_eval_config(&resolution.path, &project_root)
+        .map_err(|e| CliError::Config(e.to_string()))?;
+
+    // Check agent availability if --agent was specified
+    if let Some(ref agent_key) = args.agent {
+        let available = is_agent_available(agent_key);
+        if !available && eval_config.fail_on_missing_agent {
+            return Err(CliError::Config(format!(
+                "EVAL_AGENT_UNAVAILABLE: Agent '{}' is not available. Install it or use --agent with an available agent.",
+                agent_key
+            )));
+        }
+        if !available {
+            eprintln!(
+                "warning: agent '{}' is not available (fail_on_missing_agent=false, continuing)",
+                agent_key
+            );
+        }
+    }
+
+    if use_json {
+        let output = serde_json::json!({
+            "valid": true,
+            "prompts_path": eval_config.prompts_path,
+            "checks_path": eval_config.checks_path,
+            "timeout_seconds": eval_config.timeout_seconds,
+            "fail_on_missing_agent": eval_config.fail_on_missing_agent,
+            "project_root": eval_config.project_root,
+        });
+        println!(
+            "{}",
+            serde_json::to_string_pretty(&output).unwrap_or_default()
+        );
+    } else {
+        println!("eval configuration: valid");
+        println!("  prompts: {}", eval_config.prompts_path.display());
+        if let Some(ref checks) = eval_config.checks_path {
+            println!("  checks: {}", checks.display());
+        }
+        println!("  timeout: {}s", eval_config.timeout_seconds);
+        println!(
+            "  fail_on_missing_agent: {}",
+            eval_config.fail_on_missing_agent
+        );
+        if let Some(ref agent_key) = args.agent {
+            let available = is_agent_available(agent_key);
+            println!(
+                "  agent '{}': {}",
+                agent_key,
+                if available {
+                    "available"
+                } else {
+                    "unavailable"
+                }
+            );
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/cli/commands/init.rs b/src/cli/commands/init.rs
index ada79bc..bbd97d6 100644
--- a/src/cli/commands/init.rs
+++ b/src/cli/commands/init.rs
@@ -255,6 +255,7 @@ fn build_skill_project(meta: InitMetadata<'_>) -> CliResult<SkillProjectToml> {
             server: None,
             install_depth: 5,
             skip_transitive: false,
+            eval: None,
         }),
     });
     validate_project_structure(true, dependencies.is_some())
diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs
index b249950..566585f 100644
--- a/src/cli/commands/mod.rs
+++ b/src/cli/commands/mod.rs
@@ -5,6 +5,7 @@ pub mod analyze;
 pub mod auth;
 pub mod common;
 pub mod disable;
+pub mod eval;
 pub mod init;
 pub mod install;
 pub mod list;
@@ -57,6 +58,13 @@ pub enum Commands {
     )]
     Disable(disable::DisableArgs),
 
+    /// Evaluation commands for skill quality assurance
+    #[command(
+        about = "Evaluation commands for skill quality assurance",
+        after_help = "Examples:\n  fastskill eval validate\n  fastskill eval run --agent codex --output-dir /tmp/evals"
+    )]
+    Eval(eval::EvalCommand),
+
     /// Initialize skill-project.toml for skill authors
     #[command(
         about = "Initialize skill-project.toml in current skill directory",
diff --git a/src/cli/commands/package.rs b/src/cli/commands/package.rs
index eb58aaa..21582f0 100644
--- a/src/cli/commands/package.rs
+++ b/src/cli/commands/package.rs
@@ -48,6 +48,9 @@ pub enum PackagePreset {
 ///   fastskill package --git-diff base head  # Git-based change detection
 ///   fastskill package --skills id1 id2  # Package specific skills
 ///   fastskill package --force           # Package all skills
+///
+/// Note: the `evals/` directory is omitted from packaged skill ZIPs; exclusion is enforced in
+/// `fastskill::core::packaging` when building archives, not in this CLI module.
 #[derive(Debug, Args)]
 pub struct PackageArgs {
     /// Package preset command
diff --git a/src/core/manifest.rs b/src/core/manifest.rs
index abc171b..5012001 100644
--- a/src/core/manifest.rs
+++ b/src/core/manifest.rs
@@ -273,6 +273,33 @@ pub struct FastSkillToolConfig {
     /// Skip transitive dependency resolution entirely (default: false)
     #[serde(default)]
     pub skip_transitive: bool,
+    /// Optional evaluation configuration
+    #[serde(default)]
+    pub eval: Option<EvalConfigToml>,
+}
+
+/// Evaluation configuration in TOML format ([tool.fastskill.eval])
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EvalConfigToml {
+    /// Path to prompts CSV file (relative to skill project root)
+    pub prompts: PathBuf,
+    /// Optional path to checks TOML file
+    #[serde(default)]
+    pub checks: Option<PathBuf>,
+    /// Timeout in seconds for each eval case execution
+    #[serde(default = "default_eval_timeout_seconds")]
+    pub timeout_seconds: u64,
+    /// When true, `eval run` / `eval validate --agent` fail fast if the agent CLI is not available
+    #[serde(default = "default_fail_on_missing_agent")]
+    pub fail_on_missing_agent: bool,
+}
+
+fn default_eval_timeout_seconds() -> u64 {
+    900
+}
+
+fn default_fail_on_missing_agent() -> bool {
+    true
 }
 
 fn default_install_depth() -> u32 {
diff --git a/src/core/packaging.rs b/src/core/packaging.rs
index 892c835..976f79b 100644
--- a/src/core/packaging.rs
+++ b/src/core/packaging.rs
@@ -19,7 +19,9 @@ pub fn package_skill(
     package_skill_with_id(skill_path, output_dir, version, None)
 }
 
-/// Package a skill directory into a ZIP file with explicit skill ID
+/// Package a skill directory into a ZIP file with explicit skill ID.
+///
+/// The `evals/` subtree is excluded from the archive (local eval suites are not published).
 pub fn package_skill_with_id(
     skill_path: &Path,
     output_dir: &Path,
@@ -157,6 +159,11 @@ pub fn package_skill_with_id(
             continue;
         }
 
+        // Skip evals/ directory from published artifacts
+        if relative_path_str.starts_with("evals/") || relative_path_str == "evals" {
+            continue;
+        }
+
         // Read file content
         let mut file_content = Vec::new();
         let mut file = fs::File::open(file_path).map_err(ServiceError::Io)?;
@@ -219,6 +226,11 @@ pub fn package_skill_with_id(
             continue;
         }
 
+        // Skip evals/ directory from published artifacts
+        if relative_path_str.starts_with("evals/") || relative_path_str == "evals" {
+            continue;
+        }
+
         let mut file_content = Vec::new();
         let mut file = fs::File::open(file_path).map_err(ServiceError::Io)?;
         file.read_to_end(&mut file_content)
diff --git a/src/core/repository.rs b/src/core/repository.rs
index a51163e..5fcf1f4 100644
--- a/src/core/repository.rs
+++ b/src/core/repository.rs
@@ -251,6 +251,7 @@ impl RepositoryManager {
                     server: None,
                     install_depth: 5,
                     skip_transitive: false,
+                    eval: None,
                 }),
             });
         } else if let Some(ref mut tool) = project.tool {
@@ -262,6 +263,7 @@ impl RepositoryManager {
                     server: None,
                     install_depth: 5,
                     skip_transitive: false,
+                    eval: None,
                 });
             } else if let Some(ref mut fastskill) = tool.fastskill {
                 fastskill.repositories = Some(manifest_repos);
diff --git a/src/eval/artifacts.rs b/src/eval/artifacts.rs
new file mode 100644
index 0000000..b11a5b1
--- /dev/null
+++ b/src/eval/artifacts.rs
@@ -0,0 +1,209 @@
+//! Artifact layout and persistence for eval runs
+
+use crate::eval::checks::CheckResult;
+use serde::{Deserialize, Serialize};
+use std::path::{Path, PathBuf};
+use thiserror::Error;
+
+/// Status of a single eval case
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "lowercase")]
+pub enum CaseStatus {
+    Passed,
+    Failed,
+    Error,
+    Skipped,
+}
+
+impl std::fmt::Display for CaseStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            CaseStatus::Passed => write!(f, "passed"),
+            CaseStatus::Failed => write!(f, "failed"),
+            CaseStatus::Error => write!(f, "error"),
+            CaseStatus::Skipped => write!(f, "skipped"),
+        }
+    }
+}
+
+/// Per-case result
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CaseResult {
+    pub id: String,
+    pub status: CaseStatus,
+    pub command_count: Option<usize>,
+    pub input_tokens: Option<u64>,
+    pub output_tokens: Option<u64>,
+    #[serde(default)]
+    pub check_results: Vec<CheckResult>,
+    pub error_message: Option<String>,
+}
+
+/// Aggregated run summary
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SummaryResult {
+    pub suite_pass: bool,
+    pub agent: String,
+    pub model: Option<String>,
+    pub total_cases: usize,
+    pub passed: usize,
+    pub failed: usize,
+    pub run_dir: PathBuf,
+    pub checks_path: Option<PathBuf>,
+    pub skill_project_root: PathBuf,
+    pub cases: Vec<CaseSummary>,
+}
+
+/// Per-case summary entry in summary.json
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CaseSummary {
+    pub id: String,
+    pub status: CaseStatus,
+    pub command_count: Option<usize>,
+    pub input_tokens: Option<u64>,
+    pub output_tokens: Option<u64>,
+}
+
+/// All artifacts from a completed run
+#[derive(Debug)]
+pub struct RunArtifacts {
+    pub run_id: String,
+    pub run_dir: PathBuf,
+    pub summary: SummaryResult,
+    pub case_results: Vec<CaseResult>,
+}
+
+/// Errors during artifact writing/reading
+#[derive(Debug, Error)]
+pub enum ArtifactsError {
+    #[error("EVAL_ARTIFACTS_CORRUPT: IO error: {0}")]
+    Io(#[from] std::io::Error),
+    #[error("EVAL_ARTIFACTS_CORRUPT: JSON error: {0}")]
+    Json(#[from] serde_json::Error),
+    #[error("EVAL_ARTIFACTS_CORRUPT: Missing required field: {0}")]
+    MissingField(String),
+}
+
+/// Allocate a run directory under output_dir using ISO 8601 timestamp format
+/// Appends numeric suffix if directory already exists
+pub fn allocate_run_dir(output_dir: &Path, run_id: &str) -> Result<PathBuf, ArtifactsError> {
+    let base = output_dir.join(run_id);
+    if !base.exists() {
+        std::fs::create_dir_all(&base)?;
+        return Ok(base);
+    }
+
+    // Append numeric suffix
+    for i in 2..=999 {
+        let candidate = output_dir.join(format!("{}-{}", run_id, i));
+        if !candidate.exists() {
+            std::fs::create_dir_all(&candidate)?;
+            return Ok(candidate);
+        }
+    }
+
+    // Fallback: use the base (it exists, will overwrite)
+    Ok(base)
+}
+
+/// Write per-case artifacts (stdout.txt, stderr.txt, trace.jsonl, result.json)
+pub fn write_case_artifacts(
+    run_dir: &Path,
+    case_id: &str,
+    stdout: &[u8],
+    stderr: &[u8],
+    trace_jsonl: &str,
+    result: &CaseResult,
+) -> Result<PathBuf, ArtifactsError> {
+    let case_dir = run_dir.join(case_id);
+    std::fs::create_dir_all(&case_dir)?;
+
+    std::fs::write(case_dir.join("stdout.txt"), stdout)?;
+    std::fs::write(case_dir.join("stderr.txt"), stderr)?;
+    std::fs::write(case_dir.join("trace.jsonl"), trace_jsonl)?;
+
+    let result_json = serde_json::to_string_pretty(result)?;
+    std::fs::write(case_dir.join("result.json"), result_json)?;
+
+    Ok(case_dir)
+}
+
+/// Write summary.json
+pub fn write_summary(run_dir: &Path, summary: &SummaryResult) -> Result<(), ArtifactsError> {
+    let summary_json = serde_json::to_string_pretty(summary)?;
+    std::fs::write(run_dir.join("summary.json"), summary_json)?;
+    Ok(())
+}
+
+/// Read summary.json from a run directory
+pub fn read_summary(run_dir: &Path) -> Result<SummaryResult, ArtifactsError> {
+    let summary_path = run_dir.join("summary.json");
+    let content = std::fs::read_to_string(&summary_path)?;
+    let summary: SummaryResult = serde_json::from_str(&content)?;
+    Ok(summary)
+}
+
+/// Read case result.json files from a run directory
+pub fn read_case_results(run_dir: &Path) -> Result<Vec<CaseResult>, ArtifactsError> {
+    let mut results = Vec::new();
+
+    let entries = std::fs::read_dir(run_dir)?;
+    for entry in entries.flatten() {
+        let path = entry.path();
+        if path.is_dir() {
+            let result_path = path.join("result.json");
+            if result_path.exists() {
+                let content = std::fs::read_to_string(&result_path)?;
+                let result: CaseResult = serde_json::from_str(&content)?;
+                results.push(result);
+            }
+        }
+    }
+
+    Ok(results)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    #[test]
+    fn test_allocate_run_dir_creates_new() {
+        let dir = TempDir::new().unwrap();
+        let run_dir = allocate_run_dir(dir.path(), "2026-04-01T14-00-00Z").unwrap();
+        assert!(run_dir.exists());
+        assert!(run_dir.ends_with("2026-04-01T14-00-00Z"));
+    }
+
+    #[test]
+    fn test_allocate_run_dir_suffix_on_conflict() {
+        let dir = TempDir::new().unwrap();
+        let run_dir1 = allocate_run_dir(dir.path(), "2026-04-01T14-00-00Z").unwrap();
+        let run_dir2 = allocate_run_dir(dir.path(), "2026-04-01T14-00-00Z").unwrap();
+        assert_ne!(run_dir1, run_dir2);
+        assert!(run_dir2.to_string_lossy().contains("-2"));
+    }
+
+    #[test]
+    fn test_write_and_read_summary() {
+        let dir = TempDir::new().unwrap();
+        let summary = SummaryResult {
+            suite_pass: true,
+            agent: "codex".to_string(),
+            model: None,
+            total_cases: 2,
+            passed: 2,
+            failed: 0,
+            run_dir: dir.path().to_path_buf(),
+            checks_path: None,
+            skill_project_root: dir.path().to_path_buf(),
+            cases: vec![],
+        };
+
+        write_summary(dir.path(), &summary).unwrap();
+        let read = read_summary(dir.path()).unwrap();
+        assert_eq!(read.total_cases, 2);
+        assert!(read.suite_pass);
+    }
+}
diff --git a/src/eval/checks.rs b/src/eval/checks.rs
new file mode 100644
index 0000000..47d460b
--- /dev/null
+++ b/src/eval/checks.rs
@@ -0,0 +1,330 @@
+//! Deterministic check engine for eval artifact scoring
+
+use serde::{Deserialize, Serialize};
+use std::path::PathBuf;
+use thiserror::Error;
+
+use crate::eval::trace::{TraceEvent, TracePayload};
+
+/// A deterministic check definition loaded from checks.toml
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "name")]
+pub enum CheckDefinition {
+    /// Check whether a pattern appears (or doesn't appear) in the trace
+    #[serde(rename = "trigger_expectation")]
+    TriggerExpectation {
+        pattern: String,
+        /// true = pattern must appear; false = pattern must NOT appear
+        expected: bool,
+        #[serde(default = "default_required")]
+        required: bool,
+    },
+    /// Check whether the trace contains a line with this pattern
+    #[serde(rename = "command_contains")]
+    CommandContains {
+        pattern: String,
+        #[serde(default = "default_required")]
+        required: bool,
+    },
+    /// Check whether a file exists in the working directory after execution
+    #[serde(rename = "file_exists")]
+    FileExists {
+        path: PathBuf,
+        #[serde(default = "default_required")]
+        required: bool,
+    },
+    /// Check that the number of raw_json trace lines does not exceed a limit
+    #[serde(rename = "max_command_count")]
+    MaxCommandCount {
+        limit: usize,
+        #[serde(default = "default_required")]
+        required: bool,
+    },
+}
+
+fn default_required() -> bool {
+    true
+}
+
+impl CheckDefinition {
+    pub fn name(&self) -> &str {
+        match self {
+            CheckDefinition::TriggerExpectation { .. } => "trigger_expectation",
+            CheckDefinition::CommandContains { .. } => "command_contains",
+            CheckDefinition::FileExists { .. } => "file_exists",
+            CheckDefinition::MaxCommandCount { .. } => "max_command_count",
+        }
+    }
+
+    pub fn is_required(&self) -> bool {
+        match self {
+            CheckDefinition::TriggerExpectation { required, .. } => *required,
+            CheckDefinition::CommandContains { required, .. } => *required,
+            CheckDefinition::FileExists { required, .. } => *required,
+            CheckDefinition::MaxCommandCount { required, .. } => *required,
+        }
+    }
+}
+
+/// Result of a single check
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CheckResult {
+    pub check_name: String,
+    pub passed: bool,
+    pub message: Option<String>,
+}
+
+/// TOML file structure for checks configuration
+#[derive(Debug, Deserialize)]
+pub struct ChecksToml {
+    #[serde(rename = "check", default)]
+    pub checks: Vec<CheckDefinition>,
+}
+
+/// Errors loading checks configuration
+#[derive(Debug, Error)]
+pub enum ChecksError {
+    #[error("EVAL_CHECKS_INVALID: Failed to read checks file: {0}")]
+    Io(#[from] std::io::Error),
+    #[error("EVAL_CHECKS_INVALID: Failed to parse checks TOML: {0}")]
+    Parse(#[from] toml::de::Error),
+}
+
+/// Load check definitions from a TOML file
+pub fn load_checks(path: &std::path::Path) -> Result<Vec<CheckDefinition>, ChecksError> {
+    let content = std::fs::read_to_string(path)?;
+    let parsed: ChecksToml = toml::from_str(&content)?;
+    Ok(parsed.checks)
+}
+
+/// Run all checks against captured stdout content and working directory
+pub fn run_checks(
+    checks: &[CheckDefinition],
+    stdout_content: &str,
+    trace_jsonl: &str,
+    working_dir: &std::path::Path,
+) -> Vec<CheckResult> {
+    checks
+        .iter()
+        .map(|check| run_single_check(check, stdout_content, trace_jsonl, working_dir))
+        .collect()
+}
+
+/// Count trace events with payload type `raw_json`.
+pub fn count_raw_json_events(trace_jsonl: &str) -> usize {
+    trace_jsonl
+        .lines()
+        .filter_map(|line| serde_json::from_str::<TraceEvent>(line).ok())
+        .filter(|event| matches!(event.payload, TracePayload::RawJson { .. }))
+        .count()
+}
+
+fn run_single_check(
+    check: &CheckDefinition,
+    stdout_content: &str,
+    trace_jsonl: &str,
+    working_dir: &std::path::Path,
+) -> CheckResult {
+    match check {
+        CheckDefinition::TriggerExpectation {
+            pattern, expected, ..
+        } => {
+            // Literal substring matching in stdout + trace
+            let combined = format!("{}\n{}", stdout_content, trace_jsonl);
+            let found = combined.contains(pattern.as_str());
+            let passed = found == *expected;
+            let message = if passed {
+                None
+            } else if *expected {
+                Some(format!("Pattern '{}' not found but was expected", pattern))
+            } else {
+                Some(format!("Pattern '{}' found but was not expected", pattern))
+            };
+            CheckResult {
+                check_name: "trigger_expectation".to_string(),
+                passed,
+                message,
+            }
+        }
+        CheckDefinition::CommandContains { pattern, .. } => {
+            let combined = format!("{}\n{}", stdout_content, trace_jsonl);
+            let passed = combined.contains(pattern.as_str());
+            let message = if passed {
+                None
+            } else {
+                Some(format!("Pattern '{}' not found in output", pattern))
+            };
+            CheckResult {
+                check_name: "command_contains".to_string(),
+                passed,
+                message,
+            }
+        }
+        CheckDefinition::FileExists { path, .. } => {
+            let full_path = working_dir.join(path);
+            let passed = full_path.exists();
+            let message = if passed {
+                None
+            } else {
+                Some(format!("File '{}' does not exist", path.display()))
+            };
+            CheckResult {
+                check_name: "file_exists".to_string(),
+                passed,
+                message,
+            }
+        }
+        CheckDefinition::MaxCommandCount { limit, .. } => {
+            // Count raw_json trace lines
+            let count = count_raw_json_events(trace_jsonl);
+            let passed = count <= *limit;
+            let message = if passed {
+                None
+            } else {
+                Some(format!("Command count {} exceeds limit {}", count, limit))
+            };
+            CheckResult {
+                check_name: "max_command_count".to_string(),
+                passed,
+                message,
+            }
+        }
+    }
+}
+
+/// Aggregate check results: suite passes if all required checks pass
+pub fn suite_passes(results: &[CheckResult]) -> bool {
+    results.iter().all(|r| r.passed)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::path::Path;
+    use tempfile::TempDir;
+
+    #[test]
+    fn test_trigger_expectation_passes_when_pattern_found() {
+        let check = CheckDefinition::TriggerExpectation {
+            pattern: "fastskill".to_string(),
+            expected: true,
+            required: true,
+        };
+        let results = run_checks(&[check], "fastskill triggered", "", Path::new("/tmp"));
+        assert!(results[0].passed);
+    }
+
+    #[test]
+    fn test_trigger_expectation_fails_when_pattern_missing() {
+        let check = CheckDefinition::TriggerExpectation {
+            pattern: "fastskill".to_string(),
+            expected: true,
+            required: true,
+        };
+        let results = run_checks(&[check], "nothing here", "", Path::new("/tmp"));
+        assert!(!results[0].passed);
+    }
+
+    #[test]
+    fn test_trigger_expectation_negative_passes_when_pattern_absent() {
+        let check = CheckDefinition::TriggerExpectation {
+            pattern: "fastskill".to_string(),
+            expected: false,
+            required: true,
+        };
+        let results = run_checks(&[check], "no match", "", Path::new("/tmp"));
+        assert!(results[0].passed);
+    }
+
+    #[test]
+    fn test_file_exists_check_passes() {
+        let dir = TempDir::new().unwrap();
+        let file_path = dir.path().join("output.txt");
+        std::fs::write(&file_path, "content").unwrap();
+
+        let check = CheckDefinition::FileExists {
+            path: PathBuf::from("output.txt"),
+            required: true,
+        };
+        let results = run_checks(&[check], "", "", dir.path());
+        assert!(results[0].passed);
+    }
+
+    #[test]
+    fn test_file_exists_check_fails() {
+        let dir = TempDir::new().unwrap();
+        let check = CheckDefinition::FileExists {
+            path: PathBuf::from("missing.txt"),
+            required: true,
+        };
+        let results = run_checks(&[check], "", "", dir.path());
+        assert!(!results[0].passed);
+    }
+
+    #[test]
+    fn test_max_command_count_passes() {
+        let check = CheckDefinition::MaxCommandCount {
+            limit: 5,
+            required: true,
+        };
+        let trace = r#"{"seq":0,"payload":{"type":"raw_json","data":{"cmd":"a"}}}
+{"seq":1,"payload":{"type":"raw_json","data":{"cmd":"b"}}}
+{"seq":2,"payload":{"type":"raw_line","line":"ok"}}"#;
+        let results = run_checks(&[check], "", trace, Path::new("/tmp"));
+        assert!(results[0].passed);
+    }
+
+    #[test]
+    fn test_max_command_count_fails() {
+        let check = CheckDefinition::MaxCommandCount {
+            limit: 1,
+            required: true,
+        };
+        let trace = r#"{"seq":0,"payload":{"type":"raw_json","data":{"cmd":"a"}}}
+{"seq":1,"payload":{"type":"raw_json","data":{"cmd":"b"}}}
+{"seq":2,"payload":{"type":"raw_json","data":{"cmd":"c"}}}"#;
+        let results = run_checks(&[check], "", trace, Path::new("/tmp"));
+        assert!(!results[0].passed);
+    }
+
+    #[test]
+    fn test_count_raw_json_events_ignores_substring_only() {
+        let trace = r#"{"seq":0,"payload":{"type":"raw_line","line":"mentions raw_json text"}}
+{"seq":1,"payload":{"type":"raw_json","data":{"cmd":"x"}}}"#;
+        assert_eq!(count_raw_json_events(trace), 1);
+    }
+
+    #[test]
+    fn test_suite_passes_all_passed() {
+        let results = vec![
+            CheckResult {
+                check_name: "a".to_string(),
+                passed: true,
+                message: None,
+            },
+            CheckResult {
+                check_name: "b".to_string(),
+                passed: true,
+                message: None,
+            },
+        ];
+        assert!(suite_passes(&results));
+    }
+
+    #[test]
+    fn test_suite_passes_any_failed() {
+        let results = vec![
+            CheckResult {
+                check_name: "a".to_string(),
+                passed: true,
+                message: None,
+            },
+            CheckResult {
+                check_name: "b".to_string(),
+                passed: false,
+                message: Some("failed".to_string()),
+            },
+        ];
+        assert!(!suite_passes(&results));
+    }
+}
diff --git a/src/eval/config.rs b/src/eval/config.rs
new file mode 100644
index 0000000..ecb6e5b
--- /dev/null
+++ b/src/eval/config.rs
@@ -0,0 +1,140 @@
+//! Eval configuration resolution
+
+use crate::core::manifest::{EvalConfigToml, SkillProjectToml};
+use std::path::{Path, PathBuf};
+use thiserror::Error;
+
+/// Resolved eval configuration (after path resolution)
+#[derive(Debug, Clone)]
+pub struct EvalConfig {
+    /// Absolute path to prompts CSV file
+    pub prompts_path: PathBuf,
+    /// Absolute path to checks TOML file (optional)
+    pub checks_path: Option<PathBuf>,
+    /// Timeout in seconds for each case
+    pub timeout_seconds: u64,
+    /// Whether to fail fast if agent is not available
+    pub fail_on_missing_agent: bool,
+    /// Skill project root directory
+    pub project_root: PathBuf,
+}
+
+/// Errors during eval configuration resolution
+#[derive(Debug, Error)]
+pub enum EvalConfigError {
+    #[error("EVAL_CONFIG_MISSING: No [tool.fastskill.eval] section found in skill-project.toml")]
+    ConfigMissing,
+    #[error("EVAL_PROMPTS_NOT_FOUND: Prompts CSV not found: {0}")]
+    PromptsNotFound(PathBuf),
+    #[error("Failed to read skill-project.toml: {0}")]
+    Io(#[from] std::io::Error),
+    #[error("Failed to parse skill-project.toml: {0}")]
+    Parse(String),
+}
+
+/// Resolve eval configuration from a skill-project.toml file
+pub fn resolve_eval_config(
+    project_file: &Path,
+    project_root: &Path,
+) -> Result<EvalConfig, EvalConfigError> {
+    let content = std::fs::read_to_string(project_file)?;
+    let toml: SkillProjectToml =
+        toml::from_str(&content).map_err(|e| EvalConfigError::Parse(e.to_string()))?;
+
+    let eval_config = toml
+        .tool
+        .as_ref()
+        .and_then(|t| t.fastskill.as_ref())
+        .and_then(|f| f.eval.as_ref())
+        .ok_or(EvalConfigError::ConfigMissing)?;
+
+    resolve_from_toml(eval_config, project_root)
+}
+
+/// Resolve eval configuration from parsed TOML config
+pub fn resolve_from_toml(
+    config: &EvalConfigToml,
+    project_root: &Path,
+) -> Result<EvalConfig, EvalConfigError> {
+    let prompts_path = if config.prompts.is_absolute() {
+        config.prompts.clone()
+    } else {
+        project_root.join(&config.prompts)
+    };
+
+    if !prompts_path.exists() {
+        return Err(EvalConfigError::PromptsNotFound(prompts_path));
+    }
+
+    let checks_path = config.checks.as_ref().map(|p| {
+        if p.is_absolute() {
+            p.clone()
+        } else {
+            project_root.join(p)
+        }
+    });
+
+    Ok(EvalConfig {
+        prompts_path,
+        checks_path,
+        timeout_seconds: config.timeout_seconds,
+        fail_on_missing_agent: config.fail_on_missing_agent,
+        project_root: project_root.to_path_buf(),
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    #[test]
+    fn test_resolve_eval_config_missing() {
+        let dir = TempDir::new().unwrap();
+        let project_file = dir.path().join("skill-project.toml");
+        std::fs::write(&project_file, "[metadata]\nid = \"test\"\n").unwrap();
+
+        let result = resolve_eval_config(&project_file, dir.path());
+        assert!(matches!(result, Err(EvalConfigError::ConfigMissing)));
+    }
+
+    #[test]
+    fn test_resolve_eval_config_prompts_not_found() {
+        let dir = TempDir::new().unwrap();
+        let project_file = dir.path().join("skill-project.toml");
+        std::fs::write(
+            &project_file,
+            "[metadata]\nid = \"test\"\n\n[tool.fastskill.eval]\nprompts = \"evals/prompts.csv\"\ntimeout_seconds = 900\nfail_on_missing_agent = true\n",
+        )
+        .unwrap();
+
+        let result = resolve_eval_config(&project_file, dir.path());
+        assert!(matches!(result, Err(EvalConfigError::PromptsNotFound(_))));
+    }
+
+    #[test]
+    fn test_resolve_eval_config_success() {
+        let dir = TempDir::new().unwrap();
+        let evals_dir = dir.path().join("evals");
+        std::fs::create_dir_all(&evals_dir).unwrap();
+        let prompts_file = evals_dir.join("prompts.csv");
+        std::fs::write(
+            &prompts_file,
+            "id,prompt,should_trigger\ntest-1,hello,true\n",
+        )
+        .unwrap();
+
+        let project_file = dir.path().join("skill-project.toml");
+        std::fs::write(
+            &project_file,
+            "[metadata]\nid = \"test\"\n\n[tool.fastskill.eval]\nprompts = \"evals/prompts.csv\"\ntimeout_seconds = 600\nfail_on_missing_agent = false\n",
+        )
+        .unwrap();
+
+        let result = resolve_eval_config(&project_file, dir.path());
+        assert!(result.is_ok());
+        let config = result.unwrap();
+        assert_eq!(config.timeout_seconds, 600);
+        assert!(!config.fail_on_missing_agent);
+    }
+}
diff --git a/src/eval/mod.rs b/src/eval/mod.rs
new file mode 100644
index 0000000..33ac78a
--- /dev/null
+++ b/src/eval/mod.rs
@@ -0,0 +1,12 @@
+//! Evaluation domain types and services for skill quality assurance
+
+pub mod artifacts;
+pub mod checks;
+pub mod config;
+pub mod runner;
+pub mod suite;
+pub mod trace;
+
+pub use runner::{
+    run_eval_case, AikitEvalRunner, CaseRunOptions, CaseRunOutput, EvalRunner, RunnerError,
+};
diff --git a/src/eval/runner.rs b/src/eval/runner.rs
new file mode 100644
index 0000000..c5495c4
--- /dev/null
+++ b/src/eval/runner.rs
@@ -0,0 +1,300 @@
+//! Eval runner implementation using aikit-sdk
+
+use crate::eval::artifacts::{CaseResult, CaseStatus};
+use crate::eval::checks::{count_raw_json_events, run_checks, CheckDefinition};
+use crate::eval::suite::EvalCase;
+use crate::eval::trace::{agent_events_to_trace, trace_to_jsonl, TraceEvent, TracePayload};
+use aikit_sdk::{run_agent_events, AgentEvent, RunOptions};
+use async_trait::async_trait;
+use std::path::PathBuf;
+use std::time::Duration;
+use thiserror::Error;
+
+/// Options for running a single eval case
+#[derive(Debug, Clone)]
+pub struct CaseRunOptions {
+    /// Agent key (e.g. "codex", "claude")
+    pub agent_key: String,
+    /// Optional model override
+    pub model: Option<String>,
+    /// Skill project root (used as working directory when no workspace_subdir)
+    pub project_root: PathBuf,
+    /// Timeout in seconds
+    pub timeout_seconds: u64,
+}
+
+/// Raw output from running a case
+#[derive(Debug)]
+pub struct CaseRunOutput {
+    pub stdout: Vec<u8>,
+    pub stderr: Vec<u8>,
+    pub exit_code: Option<i32>,
+    pub timed_out: bool,
+}
+
+/// Errors during case execution
+#[derive(Debug, Error)]
+pub enum RunnerError {
+    #[error("EVAL_AGENT_UNAVAILABLE: Agent '{0}' is not available")]
+    AgentUnavailable(String),
+    #[error("EVAL_CASE_TIMEOUT: Case timed out after {0}s")]
+    Timeout(u64),
+    #[error("Execution failed: {0}")]
+    ExecutionFailed(String),
+}
+
+/// Abstraction over eval case execution (default: aikit-backed).
+#[async_trait]
+pub trait EvalRunner: Send + Sync {
+    /// Run one case, produce stdout/stderr capture, scored result, and canonical trace JSONL.
+    async fn run_case(
+        &self,
+        case: &EvalCase,
+        opts: &CaseRunOptions,
+        checks: &[CheckDefinition],
+    ) -> (CaseRunOutput, CaseResult, String);
+}
+
+/// Default runner: `aikit_sdk::run_agent_events` inside `spawn_blocking` with SDK timeout/cwd.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct AikitEvalRunner;
+
+/// Result of agent execution within spawn_blocking
+struct AgentExecutionResult {
+    result: Result<aikit_sdk::RunResult, aikit_sdk::RunError>,
+    events: Vec<AgentEvent>,
+}
+
+#[async_trait]
+impl EvalRunner for AikitEvalRunner {
+    async fn run_case(
+        &self,
+        case: &EvalCase,
+        opts: &CaseRunOptions,
+        checks: &[CheckDefinition],
+    ) -> (CaseRunOutput, CaseResult, String) {
+        self.run_case_inner(case, opts, checks).await
+    }
+}
+
+impl AikitEvalRunner {
+    async fn run_case_inner(
+        &self,
+        case: &EvalCase,
+        opts: &CaseRunOptions,
+        checks: &[CheckDefinition],
+    ) -> (CaseRunOutput, CaseResult, String) {
+        let agent_key = opts.agent_key.clone();
+        let model = opts.model.clone();
+        let prompt = case.prompt.clone();
+        let timeout_secs = opts.timeout_seconds;
+
+        let working_dir = match &case.workspace_subdir {
+            Some(subdir) => opts.project_root.join(subdir),
+            None => opts.project_root.clone(),
+        };
+
+        let mut run_opts = RunOptions::new()
+            .with_yolo(true)
+            .with_stream(true)
+            .with_timeout(Duration::from_secs(timeout_secs))
+            .with_current_dir(working_dir.clone());
+        if let Some(model_name) = model {
+            if !model_name.trim().is_empty() {
+                run_opts = run_opts.with_model(model_name);
+            }
+        }
+
+        let spawn_result = tokio::task::spawn_blocking(move || {
+            let mut events: Vec<AgentEvent> = Vec::new();
+            let result = run_agent_events(&agent_key, &prompt, run_opts, |ev| {
+                events.push(ev.clone());
+            });
+            AgentExecutionResult { result, events }
+        });
+
+        let (run_output, trace_events) = match spawn_result.await {
+            Ok(exec_result) => match exec_result.result {
+                Ok(run_result) => {
+                    let exit_code = run_result.exit_code();
+                    let output = CaseRunOutput {
+                        stdout: run_result.stdout,
+                        stderr: run_result.stderr,
+                        exit_code,
+                        timed_out: false,
+                    };
+                    let trace = agent_events_to_trace(&exec_result.events);
+                    (output, trace)
+                }
+                Err(aikit_sdk::RunError::TimedOut {
+                    timeout, stderr, ..
+                }) => {
+                    let mut trace = agent_events_to_trace(&exec_result.events);
+                    trace.push(TraceEvent {
+                        seq: trace.len(),
+                        payload: TracePayload::Timeout,
+                    });
+                    let output = CaseRunOutput {
+                        stdout: vec![],
+                        stderr,
+                        exit_code: None,
+                        timed_out: true,
+                    };
+                    if output.stderr.is_empty() {
+                        let fallback = format!("Case timed out after {}s", timeout.as_secs());
+                        let output = CaseRunOutput {
+                            stdout: vec![],
+                            stderr: fallback.into_bytes(),
+                            exit_code: None,
+                            timed_out: true,
+                        };
+                        (output, trace)
+                    } else {
+                        (output, trace)
+                    }
+                }
+                Err(e) => {
+                    let trace = agent_events_to_trace(&exec_result.events);
+                    let output = CaseRunOutput {
+                        stdout: vec![],
+                        stderr: format!("Agent execution failed: {}", e).into_bytes(),
+                        exit_code: None,
+                        timed_out: false,
+                    };
+                    (output, trace)
+                }
+            },
+            Err(e) => {
+                let output = CaseRunOutput {
+                    stdout: vec![],
+                    stderr: format!("spawn_blocking failed: {}", e).into_bytes(),
+                    exit_code: None,
+                    timed_out: false,
+                };
+                (output, vec![])
+            }
+        };
+
+        let trace_jsonl = trace_to_jsonl(&trace_events);
+        let stdout_str = String::from_utf8_lossy(&run_output.stdout).to_string();
+        let command_count = count_raw_json_events(&trace_jsonl);
+        let check_results = run_checks(checks, &stdout_str, &trace_jsonl, &working_dir);
+        let all_passed = check_results.iter().all(|r| r.passed);
+
+        let status = if run_output.timed_out {
+            CaseStatus::Error
+        } else if checks.is_empty() {
+            if run_output.exit_code == Some(0) {
+                CaseStatus::Passed
+            } else {
+                CaseStatus::Failed
+            }
+        } else if all_passed {
+            CaseStatus::Passed
+        } else {
+            CaseStatus::Failed
+        };
+
+        let case_result = CaseResult {
+            id: case.id.clone(),
+            status,
+            command_count: Some(command_count),
+            input_tokens: None,
+            output_tokens: None,
+            check_results,
+            error_message: None,
+        };
+
+        (run_output, case_result, trace_jsonl)
+    }
+}
+
+/// Run a single eval case using the default [`AikitEvalRunner`].
+///
+/// Sole agent execution path per spec (acceptance criterion 29).
+pub async fn run_eval_case(
+    case: &EvalCase,
+    opts: &CaseRunOptions,
+    checks: &[CheckDefinition],
+) -> (CaseRunOutput, CaseResult, String) {
+    AikitEvalRunner.run_case(case, opts, checks).await
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::eval::artifacts::CaseStatus;
+
+    /// Stub runner for trait wiring tests (no aikit).
+    struct StubEvalRunner;
+
+    #[async_trait]
+    impl EvalRunner for StubEvalRunner {
+        async fn run_case(
+            &self,
+            case: &EvalCase,
+            _opts: &CaseRunOptions,
+            _checks: &[CheckDefinition],
+        ) -> (CaseRunOutput, CaseResult, String) {
+            let trace_jsonl =
+                r#"{"seq":0,"payload":{"type":"raw_line","line":"stub"}}"#.to_string();
+            let out = CaseRunOutput {
+                stdout: b"ok".to_vec(),
+                stderr: vec![],
+                exit_code: Some(0),
+                timed_out: false,
+            };
+            let result = CaseResult {
+                id: case.id.clone(),
+                status: CaseStatus::Passed,
+                command_count: Some(0),
+                input_tokens: None,
+                output_tokens: None,
+                check_results: vec![],
+                error_message: None,
+            };
+            (out, result, trace_jsonl)
+        }
+    }
+
+    #[tokio::test]
+    async fn test_eval_runner_trait_stub_returns_expected_trace() {
+        let case = EvalCase {
+            id: "c1".to_string(),
+            prompt: "p".to_string(),
+            should_trigger: true,
+            tags: vec![],
+            workspace_subdir: None,
+        };
+        let opts = CaseRunOptions {
+            agent_key: "agent".to_string(),
+            model: None,
+            project_root: PathBuf::from("/tmp"),
+            timeout_seconds: 1,
+        };
+        let runner = StubEvalRunner;
+        let (out, res, trace) = runner.run_case(&case, &opts, &[]).await;
+        assert_eq!(out.exit_code, Some(0));
+        assert_eq!(res.id, "c1");
+        assert!(trace.contains("raw_line"));
+    }
+
+    #[test]
+    fn test_case_run_options_builder() {
+        let opts = CaseRunOptions {
+            agent_key: "codex".to_string(),
+            model: Some("gpt-4".to_string()),
+            project_root: PathBuf::from("/tmp"),
+            timeout_seconds: 300,
+        };
+        assert_eq!(opts.agent_key, "codex");
+        assert_eq!(opts.model, Some("gpt-4".to_string()));
+    }
+
+    #[test]
+    fn test_runner_error_display() {
+        let err = RunnerError::AgentUnavailable("codex".to_string());
+        assert!(err.to_string().contains("codex"));
+        assert!(err.to_string().contains("EVAL_AGENT_UNAVAILABLE"));
+    }
+}
diff --git a/src/eval/suite.rs b/src/eval/suite.rs
new file mode 100644
index 0000000..8d4df58
--- /dev/null
+++ b/src/eval/suite.rs
@@ -0,0 +1,277 @@
+//! Eval suite loading and CSV parsing
+
+use std::path::{Path, PathBuf};
+use thiserror::Error;
+
+/// An individual eval case definition
+#[derive(Debug, Clone)]
+pub struct EvalCase {
+    /// Unique case identifier (alphanumeric + hyphens)
+    pub id: String,
+    /// Prompt to send to the agent
+    pub prompt: String,
+    /// Whether the skill should trigger (documentation-only; checks.toml is authoritative for pass/fail)
+    pub should_trigger: bool,
+    /// Tags for filtering
+    pub tags: Vec<String>,
+    /// Optional workspace subdirectory (relative to skill project root)
+    pub workspace_subdir: Option<PathBuf>,
+}
+
+/// A collection of eval cases
+#[derive(Debug, Default)]
+pub struct EvalSuite {
+    pub cases: Vec<EvalCase>,
+}
+
+impl EvalSuite {
+    pub fn new(cases: Vec<EvalCase>) -> Self {
+        Self { cases }
+    }
+
+    /// Filter cases by ID
+    pub fn filter_by_id(&self, id: &str) -> EvalSuite {
+        EvalSuite {
+            cases: self.cases.iter().filter(|c| c.id == id).cloned().collect(),
+        }
+    }
+
+    /// Filter cases by tag
+    pub fn filter_by_tag(&self, tag: &str) -> EvalSuite {
+        EvalSuite {
+            cases: self
+                .cases
+                .iter()
+                .filter(|c| c.tags.iter().any(|t| t == tag))
+                .cloned()
+                .collect(),
+        }
+    }
+}
+
+/// Errors that can occur when loading an eval suite
+#[derive(Debug, Error)]
+pub enum SuiteError {
+    #[error("EVAL_PROMPTS_NOT_FOUND: Prompts CSV file not found: {0}")]
+    PromptsNotFound(PathBuf),
+    #[error("EVAL_INVALID_CSV: {0}")]
+    InvalidCsv(String),
+    #[error("EVAL_INVALID_CSV: IO error: {0}")]
+    Io(#[from] std::io::Error),
+}
+
+/// Load an eval suite from a CSV file
+///
+/// Expected CSV columns: id,prompt,should_trigger,tags,workspace_subdir
+pub fn load_suite(prompts_path: &Path) -> Result<EvalSuite, SuiteError> {
+    if !prompts_path.exists() {
+        return Err(SuiteError::PromptsNotFound(prompts_path.to_path_buf()));
+    }
+
+    let content = std::fs::read_to_string(prompts_path)?;
+    parse_prompts_csv(&content)
+}
+
+/// Parse prompts CSV content
+fn parse_prompts_csv(content: &str) -> Result<EvalSuite, SuiteError> {
+    let mut lines = content.lines();
+
+    // Parse header
+    let header = lines
+        .next()
+        .ok_or_else(|| SuiteError::InvalidCsv("CSV is empty".to_string()))?;
+    let headers: Vec<String> = parse_csv_line(header);
+
+    // Find column indices
+    let id_idx = find_col(&headers, "id")?;
+    let prompt_idx = find_col(&headers, "prompt")?;
+    let should_trigger_idx = find_col(&headers, "should_trigger")?;
+    let tags_idx = headers.iter().position(|h| h.trim() == "tags");
+    let workspace_subdir_idx = headers.iter().position(|h| h.trim() == "workspace_subdir");
+
+    let mut cases = Vec::new();
+
+    for (line_num, line) in lines.enumerate() {
+        let line = line.trim();
+        if line.is_empty() {
+            continue;
+        }
+
+        let cols = parse_csv_line(line);
+
+        let id = cols
+            .get(id_idx)
+            .map(|s| s.trim().to_string())
+            .filter(|s| !s.is_empty())
+            .ok_or_else(|| {
+                SuiteError::InvalidCsv(format!("Missing id at line {}", line_num + 2))
+            })?;
+
+        let prompt = cols
+            .get(prompt_idx)
+            .map(|s| s.trim_matches('"').trim().to_string())
+            .ok_or_else(|| {
+                SuiteError::InvalidCsv(format!("Missing prompt at line {}", line_num + 2))
+            })?;
+
+        let should_trigger_str = cols
+            .get(should_trigger_idx)
+            .map(|s| s.trim().to_lowercase())
+            .unwrap_or_else(|| "false".to_string());
+        let should_trigger = should_trigger_str == "true" || should_trigger_str == "1";
+
+        let tags = if let Some(idx) = tags_idx {
+            cols.get(idx)
+                .map(|s| {
+                    s.trim()
+                        .trim_matches('"')
+                        .split(',')
+                        .map(|t| t.trim().to_string())
+                        .filter(|t| !t.is_empty())
+                        .collect()
+                })
+                .unwrap_or_default()
+        } else {
+            vec![]
+        };
+
+        let workspace_subdir = if let Some(idx) = workspace_subdir_idx {
+            cols.get(idx).and_then(|s| {
+                let s = s.trim();
+                if s.is_empty() {
+                    None
+                } else {
+                    Some(PathBuf::from(s))
+                }
+            })
+        } else {
+            None
+        };
+
+        cases.push(EvalCase {
+            id,
+            prompt,
+            should_trigger,
+            tags,
+            workspace_subdir,
+        });
+    }
+
+    Ok(EvalSuite::new(cases))
+}
+
+fn find_col(headers: &[String], name: &str) -> Result<usize, SuiteError> {
+    headers
+        .iter()
+        .position(|h| h.trim() == name)
+        .ok_or_else(|| SuiteError::InvalidCsv(format!("Missing required column: {}", name)))
+}
+
+/// Simple CSV line parser that handles quoted fields
+fn parse_csv_line(line: &str) -> Vec<String> {
+    let mut fields = Vec::new();
+    let mut current = String::new();
+    let mut in_quotes = false;
+    let mut chars = line.chars().peekable();
+
+    while let Some(ch) = chars.next() {
+        match ch {
+            '"' => {
+                if in_quotes {
+                    // Check for escaped quote ""
+                    if chars.peek() == Some(&'"') {
+                        chars.next();
+                        current.push('"');
+                    } else {
+                        in_quotes = false;
+                    }
+                } else {
+                    in_quotes = true;
+                }
+            }
+            ',' if !in_quotes => {
+                fields.push(current.clone());
+                current.clear();
+            }
+            _ => {
+                current.push(ch);
+            }
+        }
+    }
+    fields.push(current);
+    fields
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_prompts_csv_basic() {
+        let csv = "id,prompt,should_trigger,tags,workspace_subdir\n\
+                   test-1,\"Do something\",true,\"basic\",\n\
+                   test-2,\"Do nothing\",false,\"\",\n";
+        let suite = parse_prompts_csv(csv).unwrap();
+        assert_eq!(suite.cases.len(), 2);
+        assert_eq!(suite.cases[0].id, "test-1");
+        assert!(suite.cases[0].should_trigger);
+        assert_eq!(suite.cases[1].id, "test-2");
+        assert!(!suite.cases[1].should_trigger);
+    }
+
+    #[test]
+    fn test_parse_prompts_csv_missing_required_col() {
+        let csv = "id,prompt\ntest-1,hello\n";
+        let result = parse_prompts_csv(csv);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("should_trigger"));
+    }
+
+    #[test]
+    fn test_filter_by_id() {
+        let cases = vec![
+            EvalCase {
+                id: "a".to_string(),
+                prompt: "p1".to_string(),
+                should_trigger: true,
+                tags: vec![],
+                workspace_subdir: None,
+            },
+            EvalCase {
+                id: "b".to_string(),
+                prompt: "p2".to_string(),
+                should_trigger: false,
+                tags: vec![],
+                workspace_subdir: None,
+            },
+        ];
+        let suite = EvalSuite::new(cases);
+        let filtered = suite.filter_by_id("a");
+        assert_eq!(filtered.cases.len(), 1);
+        assert_eq!(filtered.cases[0].id, "a");
+    }
+
+    #[test]
+    fn test_filter_by_tag() {
+        let cases = vec![
+            EvalCase {
+                id: "a".to_string(),
+                prompt: "p1".to_string(),
+                should_trigger: true,
+                tags: vec!["foo".to_string(), "bar".to_string()],
+                workspace_subdir: None,
+            },
+            EvalCase {
+                id: "b".to_string(),
+                prompt: "p2".to_string(),
+                should_trigger: false,
+                tags: vec!["baz".to_string()],
+                workspace_subdir: None,
+            },
+        ];
+        let suite = EvalSuite::new(cases);
+        let filtered = suite.filter_by_tag("foo");
+        assert_eq!(filtered.cases.len(), 1);
+        assert_eq!(filtered.cases[0].id, "a");
+    }
+}
diff --git a/src/eval/trace.rs b/src/eval/trace.rs
new file mode 100644
index 0000000..e028b1f
--- /dev/null
+++ b/src/eval/trace.rs
@@ -0,0 +1,127 @@
+//! Trace event types for eval case execution
+
+use aikit_sdk::{AgentEvent, AgentEventPayload};
+use serde::{Deserialize, Serialize};
+
+/// A single line in a trace.jsonl file
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TraceEvent {
+    /// Sequence number (0-based)
+    pub seq: usize,
+    /// Event payload
+    pub payload: TracePayload,
+}
+
+/// Payload of a trace event
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum TracePayload {
+    /// A raw JSON line from the agent
+    RawJson { data: serde_json::Value },
+    /// A raw text line from stdout
+    RawLine { line: String },
+    /// A raw bytes chunk (base64-encoded)
+    RawBytes { b64: String },
+    /// Execution error
+    Error { message: String },
+    /// Case timed out
+    Timeout,
+}
+
+/// Convert aikit-sdk AgentEvent to internal TraceEvent
+pub fn agent_events_to_trace(events: &[AgentEvent]) -> Vec<TraceEvent> {
+    events
+        .iter()
+        .map(|ev| {
+            let payload = match &ev.payload {
+                AgentEventPayload::JsonLine(value) => TracePayload::RawJson {
+                    data: value.clone(),
+                },
+                AgentEventPayload::RawLine(line) => TracePayload::RawLine { line: line.clone() },
+                AgentEventPayload::RawBytes(bytes) => {
+                    use base64::{engine::general_purpose::STANDARD, Engine as _};
+                    TracePayload::RawBytes {
+                        b64: STANDARD.encode(bytes),
+                    }
+                }
+            };
+            TraceEvent {
+                seq: ev.seq as usize,
+                payload,
+            }
+        })
+        .collect()
+}
+
+/// Convert raw stdout lines to trace events
+pub fn stdout_to_trace(stdout: &[u8]) -> Vec<TraceEvent> {
+    let text = String::from_utf8_lossy(stdout);
+    let mut events = Vec::new();
+
+    for (seq, line) in text.lines().enumerate() {
+        let trimmed = line.trim();
+        if trimmed.is_empty() {
+            continue;
+        }
+
+        // Try to parse as JSON first
+        let payload = if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) {
+            TracePayload::RawJson { data: value }
+        } else {
+            TracePayload::RawLine {
+                line: line.to_string(),
+            }
+        };
+
+        events.push(TraceEvent { seq, payload });
+    }
+
+    events
+}
+
+/// Serialize trace events to JSONL format
+pub fn trace_to_jsonl(events: &[TraceEvent]) -> String {
+    events
+        .iter()
+        .filter_map(|e| serde_json::to_string(e).ok())
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_stdout_to_trace_text_lines() {
+        let stdout = b"hello world\nfoo bar\n";
+        let events = stdout_to_trace(stdout);
+        assert_eq!(events.len(), 2);
+        assert_eq!(events[0].seq, 0);
+        assert!(
+            matches!(&events[0].payload, TracePayload::RawLine { line } if line == "hello world")
+        );
+    }
+
+    #[test]
+    fn test_stdout_to_trace_json_lines() {
+        let stdout = b"{\"key\": \"value\"}\nplain line\n";
+        let events = stdout_to_trace(stdout);
+        assert_eq!(events.len(), 2);
+        assert!(matches!(&events[0].payload, TracePayload::RawJson { .. }));
+        assert!(matches!(&events[1].payload, TracePayload::RawLine { .. }));
+    }
+
+    #[test]
+    fn test_trace_to_jsonl() {
+        let events = vec![TraceEvent {
+            seq: 0,
+            payload: TracePayload::RawLine {
+                line: "test".to_string(),
+            },
+        }];
+        let jsonl = trace_to_jsonl(&events);
+        assert!(jsonl.contains("\"seq\":0"));
+        assert!(jsonl.contains("raw_line"));
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 6eef1a0..4dc9dc8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -49,6 +49,7 @@
 //! ```
 
 pub mod core;
+pub mod eval;
 pub mod events;
 pub mod execution;
 pub mod http;
diff --git a/tests/cli/eval_tests.rs b/tests/cli/eval_tests.rs
new file mode 100644
index 0000000..f98a08f
--- /dev/null
+++ b/tests/cli/eval_tests.rs
@@ -0,0 +1,301 @@
+//! CLI integration tests for eval commands
+
+#![allow(clippy::all, clippy::unwrap_used, clippy::expect_used)]
+
+use super::snapshot_helpers::{
+    assert_snapshot_with_settings, cli_snapshot_settings, run_fastskill_command,
+    run_fastskill_command_with_env,
+};
+
+#[test]
+fn test_eval_help() {
+    let result = run_fastskill_command(&["eval", "--help"], None);
+    assert!(result.success);
+    assert_snapshot_with_settings("eval_help", &result.stdout, &cli_snapshot_settings());
+}
+
+#[test]
+fn test_eval_validate_help() {
+    let result = run_fastskill_command(&["eval", "validate", "--help"], None);
+    assert!(result.success);
+    assert_snapshot_with_settings(
+        "eval_validate_help",
+        &result.stdout,
+        &cli_snapshot_settings(),
+    );
+}
+
+#[test]
+fn test_eval_run_help() {
+    let result = run_fastskill_command(&["eval", "run", "--help"], None);
+    assert!(result.success);
+    assert_snapshot_with_settings("eval_run_help", &result.stdout, &cli_snapshot_settings());
+}
+
+#[test]
+fn test_eval_report_help() {
+    let result = run_fastskill_command(&["eval", "report", "--help"], None);
+    assert!(result.success);
+    assert_snapshot_with_settings("eval_report_help", &result.stdout, &cli_snapshot_settings());
+}
+
+#[test]
+fn test_eval_score_help() {
+    let result = run_fastskill_command(&["eval", "score", "--help"], None);
+    assert!(result.success);
+    assert_snapshot_with_settings("eval_score_help", &result.stdout, &cli_snapshot_settings());
+}
+
+#[test]
+fn test_eval_run_requires_agent() {
+    let result = run_fastskill_command(&["eval", "run", "--output-dir", "/tmp/evals"], None);
+    assert!(!result.success);
+    let combined = format!("{}{}", result.stdout, result.stderr);
+    assert!(
+        combined.contains("--agent") || combined.contains("agent"),
+        "Expected error about missing --agent, got: {}",
+        combined
+    );
+}
+
+#[test]
+fn test_eval_run_requires_output_dir() {
+    let result = run_fastskill_command(&["eval", "run", "--agent", "codex"], None);
+    assert!(!result.success);
+    let combined = format!("{}{}", result.stdout, result.stderr);
+    assert!(
+        combined.contains("--output-dir")
+            || combined.contains("output-dir")
+            || combined.contains("output_dir"),
+        "Expected error about missing --output-dir, got: {}",
+        combined
+    );
+}
+
+#[test]
+fn test_eval_run_rejects_unsupported_agent() {
+    let result = run_fastskill_command(
+        &[
+            "eval",
+            "run",
+            "--agent",
+            "unsupported-agent-xyz",
+            "--output-dir",
+            "/tmp/evals",
+        ],
+        None,
+    );
+    assert!(!result.success);
+    let combined = format!("{}{}", result.stdout, result.stderr);
+    assert!(
+        combined.contains("unsupported-agent-xyz") || combined.contains("not a supported agent"),
+        "Expected error about unsupported agent, got: {}",
+        combined
+    );
+}
+
+#[test]
+fn test_eval_validate_no_project_file() {
+    use tempfile::TempDir;
+    let dir = TempDir::new().unwrap();
+    let result = run_fastskill_command(&["eval", "validate"], Some(dir.path()));
+    assert!(!result.success);
+    let combined = format!("{}{}", result.stdout, result.stderr);
+    assert!(
+        combined.contains("skill-project.toml") || combined.contains("EVAL_CONFIG_MISSING"),
+        "Expected error about missing skill-project.toml, got: {}",
+        combined
+    );
+}
+
+#[test]
+fn test_eval_validate_no_eval_config() {
+    use std::fs;
+    use tempfile::TempDir;
+
+    let dir = TempDir::new().unwrap();
+    fs::write(
+        dir.path().join("skill-project.toml"),
+        "[metadata]\nid = \"test-skill\"\n",
+    )
+    .unwrap();
+
+    let result = run_fastskill_command(&["eval", "validate"], Some(dir.path()));
+    assert!(!result.success);
+    let combined = format!("{}{}", result.stdout, result.stderr);
+    assert!(
+        combined.contains("EVAL_CONFIG_MISSING") || combined.contains("eval"),
+        "Expected EVAL_CONFIG_MISSING error, got: {}",
+        combined
+    );
+}
+
+#[test]
+fn test_eval_validate_with_eval_config() {
+    use std::fs;
+    use tempfile::TempDir;
+
+    let dir = TempDir::new().unwrap();
+
+    // Create evals directory and prompts.csv
+    let evals_dir = dir.path().join("evals");
+    fs::create_dir_all(&evals_dir).unwrap();
+    fs::write(
+        evals_dir.join("prompts.csv"),
+        "id,prompt,should_trigger,tags,workspace_subdir\ntest-1,\"Test prompt\",true,\"basic\",\n",
+    )
+    .unwrap();
+
+    // Create SKILL.md so it's detected as skill context
+    fs::write(dir.path().join("SKILL.md"), "# Test Skill\n").unwrap();
+
+    // Create skill-project.toml with eval config
+    fs::write(
+        dir.path().join("skill-project.toml"),
+        "[metadata]\nid = \"test-skill\"\n\n[tool.fastskill.eval]\nprompts = \"evals/prompts.csv\"\ntimeout_seconds = 300\nfail_on_missing_agent = false\n",
+    )
+    .unwrap();
+
+    let result = run_fastskill_command(&["eval", "validate"], Some(dir.path()));
+    assert!(
+        result.success,
+        "Expected eval validate to succeed, got stdout: {}, stderr: {}",
+        result.stdout, result.stderr
+    );
+    assert!(
+        result.stdout.contains("valid") || result.stdout.contains("prompts"),
+        "Expected valid output, got: {}",
+        result.stdout
+    );
+}
+
+#[test]
+fn test_eval_report_requires_run_dir() {
+    let result = run_fastskill_command(&["eval", "report"], None);
+    assert!(!result.success);
+    let combined = format!("{}{}", result.stdout, result.stderr);
+    assert!(
+        combined.contains("--run-dir") || combined.contains("run-dir"),
+        "Expected error about missing --run-dir, got: {}",
+        combined
+    );
+}
+
+#[test]
+fn test_eval_score_requires_run_dir() {
+    let result = run_fastskill_command(&["eval", "score"], None);
+    assert!(!result.success);
+    let combined = format!("{}{}", result.stdout, result.stderr);
+    assert!(
+        combined.contains("--run-dir") || combined.contains("run-dir"),
+        "Expected error about missing --run-dir, got: {}",
+        combined
+    );
+}
+
+#[test]
+fn test_eval_report_nonexistent_run_dir() {
+    let result = run_fastskill_command(
+        &[
+            "eval",
+            "report",
+            "--run-dir",
+            "/tmp/nonexistent-fastskill-eval-dir-xyz123",
+        ],
+        None,
+    );
+    assert!(!result.success);
+    let combined = format!("{}{}", result.stdout, result.stderr);
+    assert!(
+        combined.contains("EVAL_ARTIFACTS_CORRUPT") || combined.contains("not exist"),
+        "Expected error about nonexistent dir, got: {}",
+        combined
+    );
+}
+
+#[test]
+fn test_eval_run_persists_event_trace_jsonl() {
+    use serde_json::Value;
+    use std::env;
+    use std::fs;
+    use tempfile::TempDir;
+
+    let dir = TempDir::new().unwrap();
+    let evals_dir = dir.path().join("evals");
+    fs::create_dir_all(&evals_dir).unwrap();
+    fs::write(
+        evals_dir.join("prompts.csv"),
+        "id,prompt,should_trigger,tags,workspace_subdir\ntrace-case,\"test prompt\",true,\"basic\",\n",
+    )
+    .unwrap();
+    fs::write(dir.path().join("SKILL.md"), "# Test Skill\n").unwrap();
+    fs::write(
+        dir.path().join("skill-project.toml"),
+        "[metadata]\nid = \"test-skill\"\n\n[tool.fastskill.eval]\nprompts = \"evals/prompts.csv\"\ntimeout_seconds = 30\nfail_on_missing_agent = true\n",
+    )
+    .unwrap();
+
+    // Create a fake `agent` executable so aikit_sdk::is_agent_available("agent") succeeds.
+    let bin_dir = dir.path().join("bin");
+    fs::create_dir_all(&bin_dir).unwrap();
+    let agent_path = bin_dir.join("agent");
+    fs::write(
+        &agent_path,
+        "#!/usr/bin/env bash\nif [[ \"$1\" == \"--version\" ]]; then echo \"agent 0.1\"; exit 0; fi\necho '{\"event\":\"ok\"}'\nexit 0\n",
+    )
+    .unwrap();
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt;
+        let mut perms = fs::metadata(&agent_path).unwrap().permissions();
+        perms.set_mode(0o755);
+        fs::set_permissions(&agent_path, perms).unwrap();
+    }
+
+    let output_dir = dir.path().join("out");
+    let path = env::var("PATH").unwrap_or_default();
+    let merged_path = format!("{}:{}", bin_dir.display(), path);
+    let env_vars = vec![("PATH", merged_path.as_str())];
+
+    let result = run_fastskill_command_with_env(
+        &[
+            "eval",
+            "run",
+            "--agent",
+            "agent",
+            "--output-dir",
+            output_dir.to_str().unwrap(),
+            "--case",
+            "trace-case",
+            "--json",
+        ],
+        &env_vars,
+        Some(dir.path()),
+    );
+    assert!(
+        result.success,
+        "Expected eval run to succeed, got stdout: {}, stderr: {}",
+        result.stdout, result.stderr
+    );
+
+    let json_start = result.stdout.find('{').unwrap();
+    let summary: Value = serde_json::from_str(&result.stdout[json_start..]).unwrap();
+    let run_dir = summary["run_dir"].as_str().unwrap();
+    let trace_path = std::path::Path::new(run_dir)
+        .join("trace-case")
+        .join("trace.jsonl");
+    let trace_jsonl = fs::read_to_string(&trace_path).unwrap();
+
+    assert!(
+        trace_jsonl.contains("\"type\":\"raw_json\""),
+        "expected persisted trace.jsonl to contain raw_json event, got: {}",
+        trace_jsonl
+    );
+
+    let result_path = std::path::Path::new(run_dir)
+        .join("trace-case")
+        .join("result.json");
+    let case_result: Value =
+        serde_json::from_str(&fs::read_to_string(result_path).unwrap()).unwrap();
+    assert_eq!(case_result["command_count"], 1);
+}
diff --git a/tests/cli/mod.rs b/tests/cli/mod.rs
index 50d76de..cda8814 100644
--- a/tests/cli/mod.rs
+++ b/tests/cli/mod.rs
@@ -7,6 +7,7 @@ pub mod add_tests;
 pub mod analyze_cluster_tests;
 pub mod auth_e2e_tests;
 pub mod config_tests;
+pub mod eval_tests;
 pub mod example_tests;
 pub mod help_tests;
 pub mod init_tests;
diff --git a/tests/cli/repository_tests.rs b/tests/cli/repository_tests.rs
index 820a2f6..be64a6d 100644
--- a/tests/cli/repository_tests.rs
+++ b/tests/cli/repository_tests.rs
@@ -85,6 +85,7 @@ web-scraper = "1.0.0"
         server: None,
         install_depth: 5,
         skip_transitive: false,
+        eval: None,
     });
     let repos = fastskill_config.repositories.get_or_insert_with(Vec::new);
 
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_directory_walking.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_directory_walking.snap
index b5125c6..5d169ee 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_directory_walking.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_directory_walking.snap
@@ -24,6 +24,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_finds_skills_in_current_dir.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_finds_skills_in_current_dir.snap
index b5125c6..5d169ee 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_finds_skills_in_current_dir.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_finds_skills_in_current_dir.snap
@@ -24,6 +24,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_invalid_repositories_path.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_invalid_repositories_path.snap
index b5125c6..5d169ee 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_invalid_repositories_path.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_invalid_repositories_path.snap
@@ -24,6 +24,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_repositories_path_argument.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_repositories_path_argument.snap
index b5125c6..5d169ee 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_repositories_path_argument.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_repositories_path_argument.snap
@@ -24,6 +24,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_with_env_var.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_with_env_var.snap
index b5125c6..5d169ee 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_with_env_var.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__cli_with_env_var.snap
@@ -24,6 +24,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_help.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_help.snap
new file mode 100644
index 0000000..5b3baaa
--- /dev/null
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_help.snap
@@ -0,0 +1,21 @@
+---
+source: tests/cli/snapshot_helpers.rs
+expression: normalized
+---
+Evaluation commands for skill quality assurance
+
+Usage: fastskill eval <COMMAND>
+
+Commands:
+  validate  Validate eval configuration and files
+  run       Run eval cases against an agent
+  report    Show a report for a completed eval run
+  score     Re-score saved eval artifacts without running the agent again
+  help      Print this message or the help of the given subcommand(s)
+
+Options:
+  -h, --help  Print help
+
+Examples:
+  fastskill eval validate
+  fastskill eval run --agent codex --output-dir [TEMP_DIR]
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_report_help.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_report_help.snap
new file mode 100644
index 0000000..3afaf53
--- /dev/null
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_report_help.snap
@@ -0,0 +1,16 @@
+---
+source: tests/cli/snapshot_helpers.rs
+expression: normalized
+---
+Show a report for a completed eval run
+
+Usage: fastskill eval report [OPTIONS] --run-dir <RUN_DIR>
+
+Options:
+      --run-dir <RUN_DIR>  Path to the specific run directory
+      --format <FORMAT>    Output format: table, json, grid, xml
+      --json               Shorthand for --format json
+  -h, --help               Print help
+
+Examples:
+  fastskill eval report --run-dir [TEMP_DIR]
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_run_help.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_run_help.snap
new file mode 100644
index 0000000..4bf1775
--- /dev/null
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_run_help.snap
@@ -0,0 +1,21 @@
+---
+source: tests/cli/snapshot_helpers.rs
+expression: normalized
+---
+Run eval cases against an agent
+
+Usage: fastskill eval run [OPTIONS] --agent <AGENT> --output-dir <OUTPUT_DIR>
+
+Options:
+      --agent <AGENT>            Agent to use for execution (required)
+      --output-dir <OUTPUT_DIR>  Output directory for artifacts (required)
+      --model <MODEL>            Optional model override forwarded to the agent
+      --case <CASE>              Filter: run only the case with this ID
+      --tag <TAG>                Filter: run only cases with this tag
+      --format <FORMAT>          Output format: table, json, grid, xml
+      --json                     Shorthand for --format json
+      --no-fail                  Do not fail with non-zero exit code on suite failure
+  -h, --help                     Print help
+
+Examples:
+  fastskill eval run --agent codex --output-dir [TEMP_DIR]
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_score_help.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_score_help.snap
new file mode 100644
index 0000000..252d919
--- /dev/null
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_score_help.snap
@@ -0,0 +1,17 @@
+---
+source: tests/cli/snapshot_helpers.rs
+expression: normalized
+---
+Re-score saved eval artifacts without running the agent again
+
+Usage: fastskill eval score [OPTIONS] --run-dir <RUN_DIR>
+
+Options:
+      --run-dir <RUN_DIR>  Path to the run directory to re-score
+      --format <FORMAT>    Output format: table, json, grid, xml
+      --json               Shorthand for --format json
+      --no-fail            Do not fail with non-zero exit code on suite failure
+  -h, --help               Print help
+
+Examples:
+  fastskill eval score --run-dir [TEMP_DIR]
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_validate_help.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_validate_help.snap
new file mode 100644
index 0000000..9f0f890
--- /dev/null
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__eval_validate_help.snap
@@ -0,0 +1,17 @@
+---
+source: tests/cli/snapshot_helpers.rs
+expression: normalized
+---
+Validate eval configuration and files
+
+Usage: fastskill eval validate [OPTIONS]
+
+Options:
+      --agent <AGENT>    Check agent availability for the specified agent key
+      --format <FORMAT>  Output format: table, json, grid, xml
+      --json             Shorthand for --format json
+  -h, --help             Print help
+
+Examples:
+  fastskill eval validate
+  fastskill eval validate --agent codex
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_flag.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_flag.snap
index b5125c6..5d169ee 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_flag.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_flag.snap
@@ -24,6 +24,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_no_args.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_no_args.snap
index 89ab80e..37023f2 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_no_args.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_no_args.snap
@@ -11,6 +11,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
@@ -47,6 +48,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_short_flag.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_short_flag.snap
index a8ba135..611645c 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_short_flag.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__help_short_flag.snap
@@ -11,6 +11,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__serve_invalid_port.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__serve_invalid_port.snap
index 7b21dee..f78dbd2 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__serve_invalid_port.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__serve_invalid_port.snap
@@ -11,6 +11,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__sources_add_missing_url.snap b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__sources_add_missing_url.snap
index c6bbaf4..cdf953d 100644
--- a/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__sources_add_missing_url.snap
+++ b/tests/cli/snapshots/cli_tests__cli__snapshot_helpers__sources_add_missing_url.snap
@@ -11,6 +11,7 @@ Commands:
   analyze      Diagnostic and analysis commands
   auth         Manage authentication for registries
   disable      Disable skills by ID
+  eval         Evaluation commands for skill quality assurance
   init         Initialize skill-project.toml in current skill directory
   install      Apply manifest: install skills from skill-project.toml [dependencies] (canonical command for manifest-driven workflow)
   list         List installed skills and reconcile with skill-project.toml and skills.lock (shows name, description, and flags by default; use --details for full info)
diff --git a/tests/core_manifest_tests.rs b/tests/core_manifest_tests.rs
index f40d939..18f2cf3 100644
--- a/tests/core_manifest_tests.rs
+++ b/tests/core_manifest_tests.rs
@@ -170,6 +170,7 @@ fn test_validation_project_level() {
                 server: None,
                 install_depth: 5,
                 skip_transitive: false,
+                eval: None,
             }),
         }),
     };