From d03797b7f6eb75bc0fc9961ddc1f9a1f578350eb Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Thu, 19 Feb 2026 17:27:11 -0300 Subject: [PATCH 01/18] feat: add scaffold for aiken vulnerability detection --- .../003-ai-aiken-vulnerability-scaffolding.md | 177 ++++++++++++++++++ design/003-assets/README.md | 44 +++++ design/003-assets/c4-component.puml | 23 +++ design/003-assets/c4-container.puml | 28 +++ design/003-assets/c4-context.puml | 13 ++ .../vulnerabilities/001-state-transition.md | 5 + .../vulnerabilities/002-authz-boundaries.md | 5 + src/cli.rs | 4 + src/commands/aiken/analyze.rs | 25 +++ src/commands/aiken/mod.rs | 43 +++++ src/commands/aiken/model.rs | 64 +++++++ src/commands/mod.rs | 1 + src/main.rs | 1 + src/telemetry/mod.rs | 1 + templates/aiken/permission_prompt.md | 12 ++ templates/aiken/report.md | 12 ++ tests/e2e/edge_cases.rs | 14 ++ tests/e2e/happy_path.rs | 16 ++ tests/e2e/smoke.rs | 20 ++ 19 files changed, 508 insertions(+) create mode 100644 design/003-ai-aiken-vulnerability-scaffolding.md create mode 100644 design/003-assets/README.md create mode 100644 design/003-assets/c4-component.puml create mode 100644 design/003-assets/c4-container.puml create mode 100644 design/003-assets/c4-context.puml create mode 100644 skills/vulnerabilities/001-state-transition.md create mode 100644 skills/vulnerabilities/002-authz-boundaries.md create mode 100644 src/commands/aiken/analyze.rs create mode 100644 src/commands/aiken/mod.rs create mode 100644 src/commands/aiken/model.rs create mode 100644 templates/aiken/permission_prompt.md create mode 100644 templates/aiken/report.md diff --git a/design/003-ai-aiken-vulnerability-scaffolding.md b/design/003-ai-aiken-vulnerability-scaffolding.md new file mode 100644 index 0000000..f5db7d2 --- /dev/null +++ b/design/003-ai-aiken-vulnerability-scaffolding.md @@ -0,0 +1,177 @@ +# AI Aiken Vulnerability Scaffolding + +## Overview + +This document defines the **Milestone 1 scaffolding** for an AI-assisted Aiken vulnerability analysis command in Trix. + +Scope for this milestone is intentionally limited to: +- CLI wiring for a new scoped command: `trix aiken` +- Contracts for iterative skill-by-skill analysis state (JSON) +- Contracts for permission prompt generation and final vulnerability report generation +- Local-first execution boundaries and security assumptions +- C4 architecture diagrams in PlantUML + +Out of scope for this milestone: +- Real LLM integration implementation +- Actual command execution orchestration against an AI provider +- Deep prompt engineering and remediation automation + +## Goals + +1. Establish a stable command surface for future implementation. +2. Define the analysis loop model that processes vulnerability skills one by one. +3. Persist progress incrementally in JSON after each skill iteration. +4. Produce a final vulnerability document contract (Markdown output path + structure). +5. Define a local command permission prompt contract (e.g. `grep`, `cat`) for constrained auto-execution. + +## CLI Surface (Scaffolding) + +`trix aiken` is a **scoped** command and requires a project context (`trix.toml`). + +**⚠️ EXPERIMENTAL**: This command requires the `unstable` feature to be enabled. Build with: +```bash +cargo build --features unstable +``` + +### Command Structure + +The command follows a subcommand pattern for extensibility: + +```bash +trix aiken [options] +``` + +### Available Subcommands (Milestone 1) + +#### `trix aiken analyze` + +Analyzes Aiken code for vulnerabilities using AI-assisted detection. + +**Arguments:** +- `--state-out` (default: `.tx3/aiken-analysis/state.json`) - Path where the incremental analysis state JSON will be written +- `--report-out` (default: `.tx3/aiken-analysis/vulnerabilities.md`) - Path where the final vulnerability report markdown will be written +- `--skills-dir` (default: `skills/vulnerabilities`) - Path to vulnerability skill definitions + +**Example:** +```bash +trix aiken analyze +trix aiken analyze --state-out ./custom/state.json +``` + +## Skill-by-skill Loop Contract + +The analysis process is modeled as an iterative loop: + +1. Load one vulnerability skill definition. +2. Build a focused mini-prompt for that single skill. +3. Execute analysis (future milestone, provider-backed). +4. Append iteration result to JSON state. +5. Continue with next skill until all skills are processed. +6. Render final vulnerability report document from aggregate findings. + +This loop enables narrow prompts per skill, improving precision and traceability. + +## State and Output Contracts + +### Incremental JSON state + +Defined by `AnalysisStateJson` and related structures in: +- `src/commands/aiken/model.rs` + +Key sections: +- Target metadata and provider spec +- Permission prompt spec (allowed local commands, scope rules) +- Ordered list of `SkillIterationResult` + +### Final report + +Defined by `VulnerabilityReportSpec` and a Markdown template scaffold: +- `templates/aiken/report.md` + +### Permission prompt + +Template scaffold: +- `templates/aiken/permission_prompt.md` + +This prompt is intended to grant **explicit, bounded, local** command execution rights to the analysis agent. + +## External AI Service Note + +Future milestones may integrate external AI services (for example, **Anthropic**) behind a provider adapter boundary. + +For this milestone, provider integration is represented as a contract only (`ProviderSpec`) and no network behavior is implemented. + +## Local Execution and Safety Boundaries + +The architecture assumes: +- Analysis runs locally from the developer machine. +- Only an allowlist of read-oriented commands is permitted by policy prompt (e.g. `grep`, `cat`, `find`, `ls`). +- Writes are limited to designated output artifacts (state JSON and report document). +- Scope rules constrain path access to project roots. + +## C4 Architecture Diagrams + +C4 diagrams are maintained as separate PlantUML files in [`003-assets/`](003-assets/) for easier image generation and version control. + +### Generating Diagrams + +To generate PNG/SVG images from PlantUML source: + +```bash +# Using PlantUML CLI +plantuml design/003-assets/*.puml + +# Or using Docker +docker run --rm -v $(pwd)/design/003-assets:/data plantuml/plantuml:latest *.puml +``` + +### C4 - Context Diagram + +**Source:** [c4-context.puml](003-assets/c4-context.puml) + +![C4 Context Diagram](https://www.plantuml.com/plantuml/svg/LP71SXen343l-nKg9peDxA4vfPSmj9b9XpGCQNibnXP1nMjx97c1lw-zcsc7qwwifoTBlXSrhBdpiBpTBcDGFEjsGKSClxCFpGSArcU7S51DSjUsR4xpDz93tcL1jhKWwDp6hatUX2gQYJfFktPvErlNerzFgxOpeiZj_nRpLCYcMIDB35E7_GrClcAFFYRaIGasEGZyP3e31J3WepKU4iS_Q7NoiNcv564trG8KUE2MgyT9_Jz_XcJiqRmXT2QK3__Zjz_EEZLKgFA378foyGXhZZsviUrpKehrV9yrCq9wpu5aUm0Zi0Xwu9Z8NaltKefNmoKVo2ECgMfSZtS6YPajte0SoVj2nwmfgnHDiq4_hc0SPvpzjGPWwaky5gWoOtfejK7Z16nq29-WppuWcnrvqijxWozmOb8K-Fxo_0D4ZIAOuE39yD75Km3655clpCDc7eR1xq0mE1bbCqbeQK-shgDBAeE9qIBrM0FZBqB3AFUbghxiNm00) + +*System context showing developer interaction with Trix CLI, local filesystem, and future external AI provider integration.* + +### C4 - Container Diagram + +**Source:** [c4-container.puml](003-assets/c4-container.puml) + +![C4 Container Diagram](https://www.plantuml.com/plantuml/svg/RLJ1KXin3BtlLwWvXMaX5qwS22dCf1DQ9j3so3Yx8cZYjLrB3cGw_VVAQpRDXbvYIthIa-INlMGnKL9hONR-EFDE33cCZrFQeFkHhAi-aAjjQX2UHG9VJ2RHl9ojIPxJAZ76MbFGoLdjsqcmnkLIfooDfTLaTdvwbrtVRnUJrh1Wd1oId0N5LjKTHlPktE3s4um-unQj3nX79zNzJbFQnokVN6FYRYoHNXNoe0VC5lFH2Vok0FRbnhKb3NUTgxTjZMj6wbWc5lW8Csj2DZKJwX85T19tmPCJoAQ9wzIY6izgMN9hRUAGm7eVjCBzXgo5XHfmxTOQwLdoEHUCHf1XcwpJwueioGuuvt37aaBJ0LZqE688qRT1jCnTTybZ1CyO3-Pum3RON23xhXBPXg4bHwSbjt04Z2qnatTGt4Sy7NSU9P_mCv8CEBxSV_kQpVmqn5fJDH0nhyPOhM_iZkd_0qGCFagkr5tUbxurSTFu5pUSGyTjKeqCJwJLZtPNQhxVr9OQZ5rokSAqCK7q7DyaIH5FzhGI3GciteIKtTCLgxCM1dpLb4mwdGDZt5ATP_fJzVAxVfNn4slAmjUAkY6BK48AEy2OBBxhWsBxjlfs55yjqHQb5x7EtTPla3MI0vsl5vcOADmlkqUKu5ubI_oLa8MBl0Oo62JyUsuVaO4wZMPng9Ni4RYiknTN_oJwsv1Ryrjua77m5cNK2p2yqKmVZmReXTSjjDF6cuoAv0gcC0n1RI-R0NqtQuzSecdwJJoHnV9FaBtC-fI-LqFxVi6XlFYuWTfOcv7L9RecVvt-0W00) + +*Container-level view of Trix CLI internals: command entrypoint, skill loop engine, prompt composer, state/report writers, and provider adapter boundary.* + +### C4 - Component Diagram + +**Source:** [c4-component.puml](003-assets/c4-component.puml) + +![C4 Component Diagram](https://www.plantuml.com/plantuml/svg/TPFFRjim3CRlVWekfnPhuqjFFJNDQKMp1Issgw58J29KVpIAQXeCU_T9jc4dNdeBAVBtCVpeXyY2E5a3oxjcwMtm3Xqt2_s6xbSohlgYdJH98UoPGxolQnA7-KxpFcrIH9BUSMwOIs_hO8GhgDl8okXDlRoxNPVKpzLpRKLafFfKP1voRLKjCq5eXzGy-kIKeEELAEetC6kdWDnWXLEmyYePd5t3dmfWe5n9gt8f9NSbQ1Tl84-qrRiR2EItsk3rBBzQh-OKowzNwGCga3qp0-WOl7OykpwZnZTjJ6EyK4YbhOJmt8O5iqsS2C5s8-L4UOgGU-4z6OSaDjfeFaBB0S4WWBN5YOhLJZU1SioPiSeHhDi87fCsdscrRn6sNgQ82hh-71B8H9IvdUf4Ao1P7QFsRbHS3xaBkems6C48nMCIaa-qZPV235iSqVqmqsLonZJMApHbboM2LObwRz3qxYoSCCUeOrlo53FlAsX64fpPmnpn8Zwo9npmhoMwkBqNp4xL1yhS2Df9GfiXmi3Jwu_lqB8cVCBWYHj2fv2GMaR9m4kV-NHRNK6St8GLzARymKqD8l-kStaZb0ZSwgpx52nAwkmSieRcRcUy9n-qh3tlaQ3746Z7kEjNj-a_hIpvjwfUq7IVpUJA2NydZ1p1uSTmH6Fr-RLkkai8IBTjvM3zs33UnRIj6mWLu0VAL4Qyq7v-DYbY74zWMZVwdqMZBdy2XTo3yJwC9tJQzR-hrBkrrKwO_lsqOshGOCPz_pSG5jz02cFAUDL3nYLhgdy0) + +*Component-level detail of the Aiken command module: skill loader, prompt builders, state model, storage, report renderer, and provider adapter.* + +## Skills Source Convention + +Vulnerability skills live under: +- `skills/vulnerabilities/` + +One file per skill, designed for 1:1 loop processing. + +Suggested frontmatter-like fields per file: +- `id` +- `title` +- `severity` +- `description` +- `prompt_fragment` + +## Milestone 1 Acceptance Criteria + +- `trix aiken` command exists as a scoped command with subcommand structure. +- Command is gated behind `unstable` feature flag (following `publish` pattern). +- `trix aiken analyze` subcommand is implemented as scaffold. +- `src/commands/aiken/mod.rs` routes subcommands following the `profile` pattern. +- `src/commands/aiken/analyze.rs` contains the analysis scaffold implementation. +- `src/commands/aiken/model.rs` defines scaffolding contracts for state, findings, and prompts. +- Templates for report and permission prompt exist in `templates/aiken/`. +- `skills/vulnerabilities/` exists with seed skill files. +- This design document includes C4 diagrams as separate PlantUML files in `003-assets/`. +- E2E scaffold tests verify command visibility and baseline behavior for `analyze` subcommand (tests run with `--features unstable`). diff --git a/design/003-assets/README.md b/design/003-assets/README.md new file mode 100644 index 0000000..0f3e61b --- /dev/null +++ b/design/003-assets/README.md @@ -0,0 +1,44 @@ +# Design 003 Assets + +This folder contains PlantUML C4 architecture diagrams for the AI Aiken Vulnerability Scaffolding design. + +## Files + +- `c4-context.puml` - System context diagram +- `c4-container.puml` - Container-level architecture +- `c4-component.puml` - Component-level details + +## Generating Images + +### Using PlantUML CLI + +Install PlantUML and run: + +```bash +plantuml c4-*.puml +``` + +This will generate PNG files in the same directory. + +### Using Docker + +```bash +docker run --rm -v $(pwd):/data plantuml/plantuml:latest c4-*.puml +``` + +### Using Online Editor + +1. Copy the content of any `.puml` file +2. Go to https://www.plantuml.com/plantuml/uml/ +3. Paste and generate + +### Using VS Code + +Install the PlantUML extension: +- Extension ID: `jebbs.plantuml` +- Right-click on `.puml` file → "Preview Current Diagram" +- Export as PNG/SVG + +## Output + +Generated images (`*.png`, `*.svg`) should be committed to this directory so they render in the markdown document on GitHub and other viewers. diff --git a/design/003-assets/c4-component.puml b/design/003-assets/c4-component.puml new file mode 100644 index 0000000..aa996a6 --- /dev/null +++ b/design/003-assets/c4-component.puml @@ -0,0 +1,23 @@ +@startuml C4_Component_AikenVuln +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Component.puml + +Container_Boundary(aiken, "Aiken Command Module") { + Component(cmd, "run(args, config, profile)", "mod.rs", "Scoped command entrypoint") + Component(skill_loader, "Skill Loader", "future module", "Loads one vulnerability skill at a time") + Component(mini_prompt, "Mini Prompt Builder", "future module", "Builds focused prompt for current skill") + Component(permission_prompt, "Permission Prompt Builder", "template contract", "Builds local command permission prompt") + Component(state_model, "State Model", "model.rs", "AnalysisStateJson + iteration contracts") + Component(state_store, "State Store", "future module", "Reads/writes incremental JSON state") + Component(report_renderer, "Report Renderer", "template contract", "Renders vulnerability markdown") + Component(provider_adapter, "Provider Adapter", "future trait", "Anthropic/other provider integration boundary") +} + +Rel(cmd, skill_loader, "requests next skill") +Rel(cmd, mini_prompt, "builds per-skill prompt") +Rel(cmd, permission_prompt, "builds bounded execution prompt") +Rel(cmd, state_model, "uses contracts") +Rel(cmd, state_store, "persists each loop iteration") +Rel(cmd, report_renderer, "renders final report") +Rel(cmd, provider_adapter, "future: execute AI calls") + +@enduml diff --git a/design/003-assets/c4-container.puml b/design/003-assets/c4-container.puml new file mode 100644 index 0000000..b50301c --- /dev/null +++ b/design/003-assets/c4-container.puml @@ -0,0 +1,28 @@ +@startuml C4_Container_AikenVuln +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Container.puml + +Person(dev, "Developer") +System_Boundary(trix, "Trix CLI") { + Container(cli, "Aiken Command", "Rust + Clap", "CLI command entrypoint and argument handling") + Container(loop, "Skill Loop Engine", "Rust", "Iterates vulnerability skills and updates state") + Container(prompt, "Prompt Composer", "Rust + Templates", "Builds mini-prompts and permission prompt") + Container(state, "State Writer", "Rust + JSON", "Persists incremental analysis state") + Container(report, "Report Writer", "Rust + Markdown Templates", "Produces final vulnerability report") + Container(provider, "Provider Adapter (Future)", "Rust trait boundary", "Abstracts external AI service") +} + +System_Ext(fs, "Local File System") +System_Ext(ai, "External AI Provider (Future)") + +Rel(dev, cli, "Invokes") +Rel(cli, loop, "Starts analysis") +Rel(loop, prompt, "Requests skill mini-prompts") +Rel(loop, state, "Stores iteration results") +Rel(loop, report, "Builds final findings report") +Rel(loop, provider, "Future: asks for analysis") +Rel(state, fs, "Writes state JSON") +Rel(report, fs, "Writes markdown report") +Rel(prompt, fs, "Reads skill files and templates") +Rel(provider, ai, "Future network calls") + +@enduml diff --git a/design/003-assets/c4-context.puml b/design/003-assets/c4-context.puml new file mode 100644 index 0000000..bb17137 --- /dev/null +++ b/design/003-assets/c4-context.puml @@ -0,0 +1,13 @@ +@startuml C4_Context_AikenVuln +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Context.puml + +Person(dev, "Developer", "Runs Trix in a local project") +System(trix, "Trix CLI", "Tx3 package manager") +System_Ext(ai, "External AI Provider", "Optional future provider such as Anthropic") +System_Ext(fs, "Local File System", "Project source, skills, outputs") + +Rel(dev, trix, "Runs `trix aiken`") +Rel(trix, fs, "Reads code + vulnerability skills; writes JSON state and Markdown report") +Rel(trix, ai, "Future: sends skill-specific prompts and receives analysis") + +@enduml diff --git a/skills/vulnerabilities/001-state-transition.md b/skills/vulnerabilities/001-state-transition.md new file mode 100644 index 0000000..014296b --- /dev/null +++ b/skills/vulnerabilities/001-state-transition.md @@ -0,0 +1,5 @@ +id: state-transition-001 +title: Unsafe state transition validation +severity: high +description: Ensure transitions are fully guarded by explicit preconditions and invariants. +prompt_fragment: Review all state transition paths and identify missing or bypassable validation checks. diff --git a/skills/vulnerabilities/002-authz-boundaries.md b/skills/vulnerabilities/002-authz-boundaries.md new file mode 100644 index 0000000..966bee4 --- /dev/null +++ b/skills/vulnerabilities/002-authz-boundaries.md @@ -0,0 +1,5 @@ +id: authz-boundaries-002 +title: Authorization boundary bypass +severity: high +description: Validate signer and role checks for every sensitive branch. +prompt_fragment: Find code paths where authorization assumptions are implicit or can be bypassed. diff --git a/src/cli.rs b/src/cli.rs index d9b1c92..a877294 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -54,6 +54,10 @@ pub enum Commands { /// Inspect and manage profiles Profile(commands::profile::Args), + /// Run Aiken vulnerability analysis scaffolding (UNSTABLE - This feature is experimental and may change) + #[command(hide = true)] + Aiken(commands::aiken::Args), + /// Publish a Tx3 package into the registry (UNSTABLE - This feature is experimental and may change) #[command(hide = true)] Publish(commands::publish::Args), diff --git a/src/commands/aiken/analyze.rs b/src/commands/aiken/analyze.rs new file mode 100644 index 0000000..d985fe7 --- /dev/null +++ b/src/commands/aiken/analyze.rs @@ -0,0 +1,25 @@ +use clap::Args as ClapArgs; +use miette::Result; + +use crate::config::{ProfileConfig, RootConfig}; + +#[derive(ClapArgs)] +pub struct Args { + /// Path where the incremental analysis state JSON will be written. + #[arg(long, default_value = ".tx3/aiken-analysis/state.json")] + pub state_out: String, + + /// Path where the final vulnerability report markdown will be written. + #[arg(long, default_value = ".tx3/aiken-analysis/vulnerabilities.md")] + pub report_out: String, + + /// Path to vulnerability skill definitions. + #[arg(long, default_value = "skills/vulnerabilities")] + pub skills_dir: String, +} + +pub fn run(_args: Args, _config: &RootConfig, _profile: &ProfileConfig) -> Result<()> { + println!("⚠️ EXPERIMENTAL: Aiken vulnerability analysis scaffolding is not implemented yet."); + println!("See design/003-ai-aiken-vulnerability-scaffolding.md for architecture and contracts."); + Ok(()) +} diff --git a/src/commands/aiken/mod.rs b/src/commands/aiken/mod.rs new file mode 100644 index 0000000..807a0b1 --- /dev/null +++ b/src/commands/aiken/mod.rs @@ -0,0 +1,43 @@ +use clap::{Args as ClapArgs, Subcommand}; +use miette::Result; + +use crate::config::{ProfileConfig, RootConfig}; + +pub mod analyze; +pub mod model; + +pub use analyze::run as run_analyze; + +#[derive(Subcommand)] +pub enum Command { + /// Analyze Aiken code for vulnerabilities using AI-assisted detection + Analyze(analyze::Args), +} + +#[derive(ClapArgs)] +pub struct Args { + #[clap(subcommand)] + pub command: Command, +} + +#[allow(unused_variables)] +pub fn run(args: Args, config: &RootConfig, profile: &ProfileConfig) -> Result<()> { + #[cfg(feature = "unstable")] + { + _run(args, config, profile) + } + #[cfg(not(feature = "unstable"))] + { + let _ = config; + let _ = profile; + Err(miette::miette!( + "The aiken command is currently unstable and requires the `unstable` feature to be enabled." + )) + } +} + +fn _run(args: Args, config: &RootConfig, profile: &ProfileConfig) -> Result<()> { + match args.command { + Command::Analyze(args) => run_analyze(args, config, profile), + } +} diff --git a/src/commands/aiken/model.rs b/src/commands/aiken/model.rs new file mode 100644 index 0000000..219c326 --- /dev/null +++ b/src/commands/aiken/model.rs @@ -0,0 +1,64 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VulnerabilitySkill { + pub id: String, + pub title: String, + pub severity: String, + pub description: String, + pub prompt_fragment: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MiniPrompt { + pub skill_id: String, + pub text: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SkillIterationResult { + pub skill_id: String, + pub status: String, + pub findings: Vec, + pub next_prompt: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VulnerabilityFinding { + pub title: String, + pub severity: String, + pub summary: String, + pub evidence: Vec, + pub recommendation: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AnalysisStateJson { + pub version: String, + pub target_path: String, + pub provider: ProviderSpec, + pub permission_prompt: PermissionPromptSpec, + pub iterations: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProviderSpec { + pub name: String, + pub model: Option, + pub notes: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PermissionPromptSpec { + pub shell: String, + pub allowed_commands: Vec, + pub scope_rules: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VulnerabilityReportSpec { + pub title: String, + pub generated_at: String, + pub target: String, + pub findings: Vec, +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index c8729a6..596ae5e 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,3 +1,4 @@ +pub mod aiken; pub mod build; pub mod check; pub mod codegen; diff --git a/src/main.rs b/src/main.rs index b21dabd..106874e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -48,6 +48,7 @@ async fn run_scoped_command(cli: Cli, config: RootConfig) -> Result<()> { Commands::Build(args) => cmds::build::run(args, &config, &profile), Commands::Identities(args) => cmds::identities::run(args, &config, &profile), Commands::Profile(args) => cmds::profile::run(args, &config, &profile), + Commands::Aiken(args) => cmds::aiken::run(args, &config, &profile), Commands::Publish(args) => cmds::publish::run(args, &config), Commands::Telemetry(args) => cmds::telemetry::run(args), }; diff --git a/src/telemetry/mod.rs b/src/telemetry/mod.rs index c90ba7b..34c0e13 100644 --- a/src/telemetry/mod.rs +++ b/src/telemetry/mod.rs @@ -40,6 +40,7 @@ impl From<&Cli> for Option { Commands::Inspect(_) => Some(CommandMetric::new("inspect")), Commands::Test(_) => Some(CommandMetric::new("test")), Commands::Identities(_) => Some(CommandMetric::new("identities")), + Commands::Aiken(_) => Some(CommandMetric::new("aiken")), Commands::Publish(_) => Some(CommandMetric::new("publish")), _ => None, } diff --git a/templates/aiken/permission_prompt.md b/templates/aiken/permission_prompt.md new file mode 100644 index 0000000..0e47b6e --- /dev/null +++ b/templates/aiken/permission_prompt.md @@ -0,0 +1,12 @@ +You are analyzing local Aiken code for vulnerabilities. + +Execution constraints: +- Execute commands only in the allowed project scope. +- Use read-oriented commands only unless explicitly granted. +- Record evidence paths for every finding. + +Allowed commands: +{{ allowed_commands }} + +Scope rules: +{{ scope_rules }} diff --git a/templates/aiken/report.md b/templates/aiken/report.md new file mode 100644 index 0000000..51af1bc --- /dev/null +++ b/templates/aiken/report.md @@ -0,0 +1,12 @@ +# Aiken Vulnerability Report + +- Target: {{ target }} +- Generated at: {{ generated_at }} + +## Summary + +This document is generated from iterative skill-by-skill vulnerability analysis. + +## Findings + +{{ findings_markdown }} diff --git a/tests/e2e/edge_cases.rs b/tests/e2e/edge_cases.rs index 58bdd8b..cd12f67 100644 --- a/tests/e2e/edge_cases.rs +++ b/tests/e2e/edge_cases.rs @@ -40,3 +40,17 @@ fn init_preserves_existing_test_file() { ctx.assert_file_contains("tests/basic.toml", "# Custom test file"); ctx.assert_file_contains("tests/basic.toml", "name = \"custom\""); } + +#[test] +#[cfg(feature = "unstable")] +fn aiken_analyze_fails_without_trix_config() { + let ctx = TestContext::new(); + let result = ctx.run_trix(&["aiken", "analyze"]); + + assert!(!result.success(), "aiken analyze should fail outside scoped project"); + assert!( + result.stderr.contains("No trix.toml found in current directory"), + "Expected missing trix.toml error, got stderr: {}", + result.stderr + ); +} diff --git a/tests/e2e/happy_path.rs b/tests/e2e/happy_path.rs index 812a838..f4435c7 100644 --- a/tests/e2e/happy_path.rs +++ b/tests/e2e/happy_path.rs @@ -144,3 +144,19 @@ fn devnet_starts_and_cshell_connects() { .args(["-f", "dolos"]) .output(); } + +#[test] +#[cfg(feature = "unstable")] +fn aiken_analyze_runs_in_initialized_project() { + let ctx = TestContext::new(); + let init_result = ctx.run_trix(&["init", "--yes"]); + assert_success(&init_result); + + let result = ctx.run_trix(&["aiken", "analyze"]); + + assert_success(&result); + assert_output_contains( + &result, + "EXPERIMENTAL", + ); +} diff --git a/tests/e2e/smoke.rs b/tests/e2e/smoke.rs index 374ab3d..d87df97 100644 --- a/tests/e2e/smoke.rs +++ b/tests/e2e/smoke.rs @@ -8,3 +8,23 @@ fn init_runs_without_error() { assert_success(&result); ctx.assert_file_exists("trix.toml"); } + +#[test] +#[cfg(feature = "unstable")] +fn aiken_help_runs_without_error() { + let ctx = TestContext::new(); + let result = ctx.run_trix(&["aiken", "--help"]); + + assert_success(&result); + assert_output_contains(&result, "analyze"); +} + +#[test] +#[cfg(feature = "unstable")] +fn aiken_analyze_help_runs_without_error() { + let ctx = TestContext::new(); + let result = ctx.run_trix(&["aiken", "analyze", "--help"]); + + assert_success(&result); + assert_output_contains(&result, "vulnerability"); +} From 77742523ea2b7b283cf4101e07ac1d38c5507e17 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Fri, 20 Feb 2026 16:01:30 -0300 Subject: [PATCH 02/18] feat: move analyze to audit + add more info to scaffold --- .../003-ai-aiken-vulnerability-scaffolding.md | 24 +- design/003-assets/c4-component.puml | 2 +- design/003-assets/c4-container.puml | 4 +- design/003-assets/c4-context.puml | 2 +- src/commands/aiken/analyze.rs | 25 -- src/commands/aiken/audit.rs | 301 ++++++++++++++++++ src/commands/aiken/mod.rs | 11 +- src/commands/aiken/provider.rs | 45 +++ src/commands/devnet/copy.rs | 33 +- src/commands/profile/list.rs | 4 +- src/commands/profile/show.rs | 2 +- src/commands/test.rs | 2 +- src/devnet/mod.rs | 2 +- src/main.rs | 5 +- src/spawn/cshell.rs | 4 +- src/spawn/tx3c.rs | 2 +- src/telemetry/mod.rs | 5 +- src/wallet.rs | 2 +- tests/e2e/edge_cases.rs | 33 +- tests/e2e/happy_path.rs | 23 +- tests/e2e/smoke.rs | 6 +- 21 files changed, 450 insertions(+), 87 deletions(-) delete mode 100644 src/commands/aiken/analyze.rs create mode 100644 src/commands/aiken/audit.rs create mode 100644 src/commands/aiken/provider.rs diff --git a/design/003-ai-aiken-vulnerability-scaffolding.md b/design/003-ai-aiken-vulnerability-scaffolding.md index f5db7d2..a7dd750 100644 --- a/design/003-ai-aiken-vulnerability-scaffolding.md +++ b/design/003-ai-aiken-vulnerability-scaffolding.md @@ -43,19 +43,19 @@ trix aiken [options] ### Available Subcommands (Milestone 1) -#### `trix aiken analyze` +#### `trix aiken audit` -Analyzes Aiken code for vulnerabilities using AI-assisted detection. +Audits Aiken code for vulnerabilities using AI-assisted detection. **Arguments:** -- `--state-out` (default: `.tx3/aiken-analysis/state.json`) - Path where the incremental analysis state JSON will be written -- `--report-out` (default: `.tx3/aiken-analysis/vulnerabilities.md`) - Path where the final vulnerability report markdown will be written +- `--state-out` (default: `.tx3/aiken-audit/state.json`) - Path where the incremental analysis state JSON will be written +- `--report-out` (default: `.tx3/aiken-audit/vulnerabilities.md`) - Path where the final vulnerability report markdown will be written - `--skills-dir` (default: `skills/vulnerabilities`) - Path to vulnerability skill definitions **Example:** ```bash -trix aiken analyze -trix aiken analyze --state-out ./custom/state.json +trix aiken audit +trix aiken audit --state-out ./custom/state.json ``` ## Skill-by-skill Loop Contract @@ -129,7 +129,7 @@ docker run --rm -v $(pwd)/design/003-assets:/data plantuml/plantuml:latest *.pum **Source:** [c4-context.puml](003-assets/c4-context.puml) -![C4 Context Diagram](https://www.plantuml.com/plantuml/svg/LP71SXen343l-nKg9peDxA4vfPSmj9b9XpGCQNibnXP1nMjx97c1lw-zcsc7qwwifoTBlXSrhBdpiBpTBcDGFEjsGKSClxCFpGSArcU7S51DSjUsR4xpDz93tcL1jhKWwDp6hatUX2gQYJfFktPvErlNerzFgxOpeiZj_nRpLCYcMIDB35E7_GrClcAFFYRaIGasEGZyP3e31J3WepKU4iS_Q7NoiNcv564trG8KUE2MgyT9_Jz_XcJiqRmXT2QK3__Zjz_EEZLKgFA378foyGXhZZsviUrpKehrV9yrCq9wpu5aUm0Zi0Xwu9Z8NaltKefNmoKVo2ECgMfSZtS6YPajte0SoVj2nwmfgnHDiq4_hc0SPvpzjGPWwaky5gWoOtfejK7Z16nq29-WppuWcnrvqijxWozmOb8K-Fxo_0D4ZIAOuE39yD75Km3655clpCDc7eR1xq0mE1bbCqbeQK-shgDBAeE9qIBrM0FZBqB3AFUbghxiNm00) +![C4 Context Diagram](https://www.plantuml.com/plantuml/svg/LP71SXen343l-nKg9peDxA4vfPSmj9b9XpGCQNibnXP1nMjx97c1lw-zcsc7qwxifoTBkXSrhBdpiBpTBcDGFEjsGKSClxCFpGSArcU7S51DSjUsR4xpDz93tcL1jZKXwDp6hatUX2gQYJfFktPvErlNerzFgxOpeiZj_nBpLCYcMIDB35E7_GrClcAFFYRaIGasEGZyP3e31J3WepKU4iS_Q7NoiNcv564trG8KUE2MgyT9_Jz_XcJiqRmXT2QK3__Zjz_EEZLKgFA378foyGXhZZsviUnpKehrV9yrCq9wlmF9zW16O17qm36HlPRkfIIlXaS-a4SOhwfnFRuP96QsU0Do9EyB7REch58qpGRzE8AndR7trng0gPi0antfQq4hEK93mGqQ9s2ZG_W4VTaNijcH9xsyb_a29oP5WUylppz0r2Y22GwU31zTF0LWJ97roZpCvs5e_mu4WvDHDfE4blPawr2wf3AO62rIZpKm_Y8qD7HVikf-_m80) *System context showing developer interaction with Trix CLI, local filesystem, and future external AI provider integration.* @@ -137,7 +137,7 @@ docker run --rm -v $(pwd)/design/003-assets:/data plantuml/plantuml:latest *.pum **Source:** [c4-container.puml](003-assets/c4-container.puml) -![C4 Container Diagram](https://www.plantuml.com/plantuml/svg/RLJ1KXin3BtlLwWvXMaX5qwS22dCf1DQ9j3so3Yx8cZYjLrB3cGw_VVAQpRDXbvYIthIa-INlMGnKL9hONR-EFDE33cCZrFQeFkHhAi-aAjjQX2UHG9VJ2RHl9ojIPxJAZ76MbFGoLdjsqcmnkLIfooDfTLaTdvwbrtVRnUJrh1Wd1oId0N5LjKTHlPktE3s4um-unQj3nX79zNzJbFQnokVN6FYRYoHNXNoe0VC5lFH2Vok0FRbnhKb3NUTgxTjZMj6wbWc5lW8Csj2DZKJwX85T19tmPCJoAQ9wzIY6izgMN9hRUAGm7eVjCBzXgo5XHfmxTOQwLdoEHUCHf1XcwpJwueioGuuvt37aaBJ0LZqE688qRT1jCnTTybZ1CyO3-Pum3RON23xhXBPXg4bHwSbjt04Z2qnatTGt4Sy7NSU9P_mCv8CEBxSV_kQpVmqn5fJDH0nhyPOhM_iZkd_0qGCFagkr5tUbxurSTFu5pUSGyTjKeqCJwJLZtPNQhxVr9OQZ5rokSAqCK7q7DyaIH5FzhGI3GciteIKtTCLgxCM1dpLb4mwdGDZt5ATP_fJzVAxVfNn4slAmjUAkY6BK48AEy2OBBxhWsBxjlfs55yjqHQb5x7EtTPla3MI0vsl5vcOADmlkqUKu5ubI_oLa8MBl0Oo62JyUsuVaO4wZMPng9Ni4RYiknTN_oJwsv1Ryrjua77m5cNK2p2yqKmVZmReXTSjjDF6cuoAv0gcC0n1RI-R0NqtQuzSecdwJJoHnV9FaBtC-fI-LqFxVi6XlFYuWTfOcv7L9RecVvt-0W00) +![C4 Container Diagram](https://www.plantuml.com/plantuml/svg/RLHDKnin3Btlhr1p2jD2Bfmu4LAOIIUqJA3jaN5sHT34QxkM7CXq-jyhhzbj4dg9hG-z9yalUybYegJMmkpySUQT678O7wUqGVSZMLTz85VRr20yYmI-c4oYUJbRapodLMACjPQWaxFQjvDWZSjGfooDfTLaTdvwbrtVRnUJrh1WdEoJd0NDhQexZEpTkC7j9nXznYrQ7p2EJghxdPQqZrS-kSR4tLWYlMhAWnumMyn79_2x0XZWnhKb3NUTmpGr91fhM-EQaRgNYGK-mioQa4sjXxf40Pt4NV3a1B9fuZgrgCQpMfRSMblPfx7U1qMutv2ri503hjrQ8prBFkU2qGWoR7M5UbsH9Ta1vnhkM59ekWGMFOuOGlHj48MvovTkZE0PuzuS3zW6cm6oxogHRHXQSdHQQWj7mDWICta7nNt4qt7dKV89FoF90SUN-szVivaVY5WnLGaHysgCLNnZTqp_7o1Yy54oKdTv3trhugRnB-vm3XstIJKoF96Y7-skOBxVr9OQZAMukyAqCK7q7DyaIH5F1bg9XWHMRtBAxkcArLaB0xvgIIQTpe4nRgdEC_sfUX5UlyhuYRLauMlDkY6BK4AQjfTZiWIlzy3wjePsDBzQeYrwBvATkwtV86ia1pfVBp8nCJzcLlDRk4G6YIpnLq8MBjewqC11mR-tzX4Pg1DeLePQoHubbrqFoYhlaNyRScikuUD-XeSeSrw0uOrM-ZX8fHJqgkkssGdZJKD5RoMdC1mcjRrc3kYxMVlC9PgctyGJMIn_0-arrfSClHeQ-WM7ykBZ1lHvRSwiBj4r-GFr5m00) *Container-level view of Trix CLI internals: command entrypoint, skill loop engine, prompt composer, state/report writers, and provider adapter boundary.* @@ -145,7 +145,7 @@ docker run --rm -v $(pwd)/design/003-assets:/data plantuml/plantuml:latest *.pum **Source:** [c4-component.puml](003-assets/c4-component.puml) -![C4 Component Diagram](https://www.plantuml.com/plantuml/svg/TPFFRjim3CRlVWekfnPhuqjFFJNDQKMp1Issgw58J29KVpIAQXeCU_T9jc4dNdeBAVBtCVpeXyY2E5a3oxjcwMtm3Xqt2_s6xbSohlgYdJH98UoPGxolQnA7-KxpFcrIH9BUSMwOIs_hO8GhgDl8okXDlRoxNPVKpzLpRKLafFfKP1voRLKjCq5eXzGy-kIKeEELAEetC6kdWDnWXLEmyYePd5t3dmfWe5n9gt8f9NSbQ1Tl84-qrRiR2EItsk3rBBzQh-OKowzNwGCga3qp0-WOl7OykpwZnZTjJ6EyK4YbhOJmt8O5iqsS2C5s8-L4UOgGU-4z6OSaDjfeFaBB0S4WWBN5YOhLJZU1SioPiSeHhDi87fCsdscrRn6sNgQ82hh-71B8H9IvdUf4Ao1P7QFsRbHS3xaBkems6C48nMCIaa-qZPV235iSqVqmqsLonZJMApHbboM2LObwRz3qxYoSCCUeOrlo53FlAsX64fpPmnpn8Zwo9npmhoMwkBqNp4xL1yhS2Df9GfiXmi3Jwu_lqB8cVCBWYHj2fv2GMaR9m4kV-NHRNK6St8GLzARymKqD8l-kStaZb0ZSwgpx52nAwkmSieRcRcUy9n-qh3tlaQ3746Z7kEjNj-a_hIpvjwfUq7IVpUJA2NydZ1p1uSTmH6Fr-RLkkai8IBTjvM3zs33UnRIj6mWLu0VAL4Qyq7v-DYbY74zWMZVwdqMZBdy2XTo3yJwC9tJQzR-hrBkrrKwO_lsqOshGOCPz_pSG5jz02cFAUDL3nYLhgdy0) +![C4 Component Diagram](https://www.plantuml.com/plantuml/svg/TPFFRjim3CRlVWekfnPhuqjFFJNDQKMp1Issgw58J29KVpIAQXeCU_T9jc4dNdeBANw_Zo7fXyY2E5a3oxjcwMtm3Xqt2_s6xbSohlgYdJH98UoPGxolQnA7-KxpFcrIH9BUSHRCfRTrCC8Lr6raPVIcNjxThalgv-gvjY8oKdrgCW-vjggMcI2qGseUVN9AqF5A5FSRcBLTm28fpP1bLZW5Aw-ImTar_Aa01jQLj2eBABahGRju0xclhTxTG22_rGQlP_dLUZMdM7wzIXzGWUoP6K37uBNZsVKPDRvfOnhZXK8giXB2SniMp3Pn8WJRjPGJvIb2ri9xCWv9RBJHV8IM0u9106kB4nUhdMu2vPWpOvKZMBSHF2PjFlDgto9ilKmH5NJw71B8H9IvdUl4Ao1P7QFsRdHS3xaBlems6C48PTCa99zu6o-56QOueljcfiFaZ6ciLsZAB4i4gnBrkq7JRsVXX3b67TkIfvXvNq4pak3C7lg85_6HFU60VotHnUcze52nFb3cHj1E4jeC4GQUNdzyXvOruHC6JzmGEeM4L1Xj1bxwpAVJwWhYv2Qie3Vb3sww8FBlEfStGed0hS-wJy6YfCxE8NjexcR6U_91ozhp7Wbw385sZBj-T9l-0ykMVwlg1KttsKokdF1tmiWH77wCHpHKdr_hfhk4W7JRLewh7mJJanjbNKEe03zGfjBY1VdvsQI8SJo1QTsu_smQTVaJAEGUZFTXFA5JrVzRfNvRM-s4wTzF5TC53Skux_y6mk8RI65CQQzwoBXaJVKF) *Component-level detail of the Aiken command module: skill loader, prompt builders, state model, storage, report renderer, and provider adapter.* @@ -167,11 +167,11 @@ Suggested frontmatter-like fields per file: - `trix aiken` command exists as a scoped command with subcommand structure. - Command is gated behind `unstable` feature flag (following `publish` pattern). -- `trix aiken analyze` subcommand is implemented as scaffold. +- `trix aiken audit` subcommand is implemented as scaffold. - `src/commands/aiken/mod.rs` routes subcommands following the `profile` pattern. -- `src/commands/aiken/analyze.rs` contains the analysis scaffold implementation. +- `src/commands/aiken/audit.rs` contains the audit scaffold implementation. - `src/commands/aiken/model.rs` defines scaffolding contracts for state, findings, and prompts. - Templates for report and permission prompt exist in `templates/aiken/`. - `skills/vulnerabilities/` exists with seed skill files. - This design document includes C4 diagrams as separate PlantUML files in `003-assets/`. -- E2E scaffold tests verify command visibility and baseline behavior for `analyze` subcommand (tests run with `--features unstable`). +- E2E scaffold tests verify command visibility and baseline behavior for `audit` subcommand (tests run with `--features unstable`). diff --git a/design/003-assets/c4-component.puml b/design/003-assets/c4-component.puml index aa996a6..9dc99ad 100644 --- a/design/003-assets/c4-component.puml +++ b/design/003-assets/c4-component.puml @@ -1,7 +1,7 @@ @startuml C4_Component_AikenVuln !include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Component.puml -Container_Boundary(aiken, "Aiken Command Module") { +Container_Boundary(aiken, "Aiken Audit Command Module") { Component(cmd, "run(args, config, profile)", "mod.rs", "Scoped command entrypoint") Component(skill_loader, "Skill Loader", "future module", "Loads one vulnerability skill at a time") Component(mini_prompt, "Mini Prompt Builder", "future module", "Builds focused prompt for current skill") diff --git a/design/003-assets/c4-container.puml b/design/003-assets/c4-container.puml index b50301c..d3cb30a 100644 --- a/design/003-assets/c4-container.puml +++ b/design/003-assets/c4-container.puml @@ -3,7 +3,7 @@ Person(dev, "Developer") System_Boundary(trix, "Trix CLI") { - Container(cli, "Aiken Command", "Rust + Clap", "CLI command entrypoint and argument handling") + Container(cli, "Aiken Audit Command", "Rust + Clap", "CLI command entrypoint and argument handling") Container(loop, "Skill Loop Engine", "Rust", "Iterates vulnerability skills and updates state") Container(prompt, "Prompt Composer", "Rust + Templates", "Builds mini-prompts and permission prompt") Container(state, "State Writer", "Rust + JSON", "Persists incremental analysis state") @@ -15,7 +15,7 @@ System_Ext(fs, "Local File System") System_Ext(ai, "External AI Provider (Future)") Rel(dev, cli, "Invokes") -Rel(cli, loop, "Starts analysis") +Rel(cli, loop, "Starts audit") Rel(loop, prompt, "Requests skill mini-prompts") Rel(loop, state, "Stores iteration results") Rel(loop, report, "Builds final findings report") diff --git a/design/003-assets/c4-context.puml b/design/003-assets/c4-context.puml index bb17137..2812191 100644 --- a/design/003-assets/c4-context.puml +++ b/design/003-assets/c4-context.puml @@ -6,7 +6,7 @@ System(trix, "Trix CLI", "Tx3 package manager") System_Ext(ai, "External AI Provider", "Optional future provider such as Anthropic") System_Ext(fs, "Local File System", "Project source, skills, outputs") -Rel(dev, trix, "Runs `trix aiken`") +Rel(dev, trix, "Runs `trix aiken audit`") Rel(trix, fs, "Reads code + vulnerability skills; writes JSON state and Markdown report") Rel(trix, ai, "Future: sends skill-specific prompts and receives analysis") diff --git a/src/commands/aiken/analyze.rs b/src/commands/aiken/analyze.rs deleted file mode 100644 index d985fe7..0000000 --- a/src/commands/aiken/analyze.rs +++ /dev/null @@ -1,25 +0,0 @@ -use clap::Args as ClapArgs; -use miette::Result; - -use crate::config::{ProfileConfig, RootConfig}; - -#[derive(ClapArgs)] -pub struct Args { - /// Path where the incremental analysis state JSON will be written. - #[arg(long, default_value = ".tx3/aiken-analysis/state.json")] - pub state_out: String, - - /// Path where the final vulnerability report markdown will be written. - #[arg(long, default_value = ".tx3/aiken-analysis/vulnerabilities.md")] - pub report_out: String, - - /// Path to vulnerability skill definitions. - #[arg(long, default_value = "skills/vulnerabilities")] - pub skills_dir: String, -} - -pub fn run(_args: Args, _config: &RootConfig, _profile: &ProfileConfig) -> Result<()> { - println!("⚠️ EXPERIMENTAL: Aiken vulnerability analysis scaffolding is not implemented yet."); - println!("See design/003-ai-aiken-vulnerability-scaffolding.md for architecture and contracts."); - Ok(()) -} diff --git a/src/commands/aiken/audit.rs b/src/commands/aiken/audit.rs new file mode 100644 index 0000000..7aa5097 --- /dev/null +++ b/src/commands/aiken/audit.rs @@ -0,0 +1,301 @@ +use clap::Args as ClapArgs; +use miette::{Context, IntoDiagnostic, Result}; +use std::path::{Path, PathBuf}; + +use crate::config::{ProfileConfig, RootConfig}; + +use super::model::{ + AnalysisStateJson, MiniPrompt, PermissionPromptSpec, SkillIterationResult, + VulnerabilityFinding, VulnerabilityReportSpec, VulnerabilitySkill, +}; +use super::provider::{AnalysisProvider, ScaffoldProvider}; + +const DEFAULT_SKILLS_DIR: &str = "skills/vulnerabilities"; + +#[derive(ClapArgs)] +pub struct Args { + /// Path where the incremental analysis state JSON will be written. + #[arg(long, default_value = ".tx3/aiken-audit/state.json")] + pub state_out: String, + + /// Path where the final vulnerability report markdown will be written. + #[arg(long, default_value = ".tx3/aiken-audit/vulnerabilities.md")] + pub report_out: String, + + /// Path to vulnerability skill definitions. + #[arg(long, default_value = "skills/vulnerabilities")] + pub skills_dir: String, +} + +pub fn run(args: Args, config: &RootConfig, _profile: &ProfileConfig) -> Result<()> { + run_scaffold_analysis(args, config, &ScaffoldProvider) +} + +fn run_scaffold_analysis( + args: Args, + config: &RootConfig, + provider: &dyn AnalysisProvider, +) -> Result<()> { + let skills_dir = PathBuf::from(&args.skills_dir); + let state_out = PathBuf::from(&args.state_out); + let report_out = PathBuf::from(&args.report_out); + let target_path = config.protocol.main.display().to_string(); + + let permission_prompt = build_permission_prompt_spec(); + let skills = load_skills(&skills_dir, &args.skills_dir)?; + + let mut state = AnalysisStateJson { + version: "1".to_string(), + target_path: target_path.clone(), + provider: provider.provider_spec(), + permission_prompt: permission_prompt.clone(), + iterations: vec![], + }; + + write_state(&state_out, &state)?; + + run_skill_loop(&skills, provider, &mut state, &state_out)?; + + let report = build_report(&state); + let report_markdown = render_report_markdown(&report); + write_text_file(&report_out, &report_markdown)?; + + println!( + "⚠️ EXPERIMENTAL: Aiken audit scaffold complete. Skills processed: {}", + state.iterations.len() + ); + println!("State written to: {}", state_out.display()); + println!("Report written to: {}", report_out.display()); + + Ok(()) +} + +fn run_skill_loop( + skills: &[VulnerabilitySkill], + provider: &dyn AnalysisProvider, + state: &mut AnalysisStateJson, + state_out: &Path, +) -> Result<()> { + for skill in skills { + let prompt = build_mini_prompt(skill); + let iteration = provider.analyze_skill(skill, &prompt)?; + append_iteration(state, iteration); + write_state(state_out, state)?; + } + + Ok(()) +} + +fn append_iteration(state: &mut AnalysisStateJson, iteration: SkillIterationResult) { + state.iterations.push(iteration); +} + +fn build_mini_prompt(skill: &VulnerabilitySkill) -> MiniPrompt { + MiniPrompt { + skill_id: skill.id.clone(), + text: format!( + "[{}:{}] {}", + skill.severity, skill.title, skill.prompt_fragment + ), + } +} + +fn build_permission_prompt_spec() -> PermissionPromptSpec { + PermissionPromptSpec { + shell: "bash".to_string(), + allowed_commands: vec![ + "grep".to_string(), + "cat".to_string(), + "find".to_string(), + "ls".to_string(), + ], + scope_rules: vec![ + "Only execute commands within the current project root.".to_string(), + "Do not write outside designated output artifacts.".to_string(), + ], + } +} + +fn build_report(state: &AnalysisStateJson) -> VulnerabilityReportSpec { + let findings = state + .iterations + .iter() + .flat_map(|iteration| iteration.findings.iter().cloned()) + .collect::>(); + + VulnerabilityReportSpec { + title: "Aiken Vulnerability Report".to_string(), + generated_at: chrono::Utc::now().to_rfc3339(), + target: state.target_path.clone(), + findings, + } +} + +fn load_skills(skills_dir: &Path, skills_dir_arg: &str) -> Result> { + if !skills_dir.exists() { + if skills_dir_arg == DEFAULT_SKILLS_DIR { + return load_embedded_seed_skills(); + } + + return Err(miette::miette!( + "Aiken skills directory not found: {}", + skills_dir.display() + )); + } + + let mut entries = std::fs::read_dir(skills_dir) + .into_diagnostic() + .context("Failed to read skills directory")? + .filter_map(|entry| entry.ok().map(|value| value.path())) + .filter(|path| path.is_file()) + .collect::>(); + + entries.sort(); + + let skills = entries + .iter() + .map(|path| load_skill_from_file(path)) + .collect::>>()?; + + if skills.is_empty() { + return Err(miette::miette!( + "No vulnerability skills found in {}", + skills_dir.display() + )); + } + + Ok(skills) +} + +fn load_embedded_seed_skills() -> Result> { + let seed_files = [ + ( + Path::new("skills/vulnerabilities/001-state-transition.md"), + include_str!("../../../skills/vulnerabilities/001-state-transition.md"), + ), + ( + Path::new("skills/vulnerabilities/002-authz-boundaries.md"), + include_str!("../../../skills/vulnerabilities/002-authz-boundaries.md"), + ), + ]; + + seed_files + .iter() + .map(|(path, content)| parse_skill_content(path, content)) + .collect::>>() +} + +fn load_skill_from_file(path: &Path) -> Result { + let content = std::fs::read_to_string(path) + .into_diagnostic() + .with_context(|| format!("Failed to read vulnerability skill file {}", path.display()))?; + + parse_skill_content(path, &content) +} + +fn parse_skill_content(path: &Path, content: &str) -> Result { + let mut id = None; + let mut title = None; + let mut severity = None; + let mut description = None; + let mut prompt_fragment = None; + + for line in content + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + { + let Some((key, value)) = line.split_once(':') else { + continue; + }; + + let key = key.trim(); + let value = value.trim().to_string(); + + match key { + "id" => id = Some(value), + "title" => title = Some(value), + "severity" => severity = Some(value), + "description" => description = Some(value), + "prompt_fragment" => prompt_fragment = Some(value), + _ => {} + } + } + + Ok(VulnerabilitySkill { + id: id.ok_or_else(|| { + miette::miette!( + "Missing `id` field in vulnerability skill file {}", + path.display() + ) + })?, + title: title.ok_or_else(|| { + miette::miette!( + "Missing `title` field in vulnerability skill file {}", + path.display() + ) + })?, + severity: severity.ok_or_else(|| { + miette::miette!( + "Missing `severity` field in vulnerability skill file {}", + path.display() + ) + })?, + description: description.ok_or_else(|| { + miette::miette!( + "Missing `description` field in vulnerability skill file {}", + path.display() + ) + })?, + prompt_fragment: prompt_fragment.ok_or_else(|| { + miette::miette!( + "Missing `prompt_fragment` field in vulnerability skill file {}", + path.display() + ) + })?, + }) +} + +fn write_state(path: &Path, state: &AnalysisStateJson) -> Result<()> { + let serialized = serde_json::to_string_pretty(state).into_diagnostic()?; + write_text_file(path, &serialized) +} + +fn write_text_file(path: &Path, content: &str) -> Result<()> { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent) + .into_diagnostic() + .with_context(|| format!("Failed to create output directory {}", parent.display()))?; + } + + std::fs::write(path, content) + .into_diagnostic() + .with_context(|| format!("Failed to write file {}", path.display())) +} + +fn render_report_markdown(report: &VulnerabilityReportSpec) -> String { + let template = include_str!("../../../templates/aiken/report.md"); + let findings_markdown = render_findings_markdown(&report.findings); + + template + .replace("{{ target }}", &report.target) + .replace("{{ generated_at }}", &report.generated_at) + .replace("{{ findings_markdown }}", &findings_markdown) +} + +fn render_findings_markdown(findings: &[VulnerabilityFinding]) -> String { + if findings.is_empty() { + return "- *(none)*".to_string(); + } + + findings + .iter() + .map(|finding| { + format!( + "- **{}** (`{}`)\n - Summary: {}\n - Recommendation: {}", + finding.title, finding.severity, finding.summary, finding.recommendation + ) + }) + .collect::>() + .join("\n") +} diff --git a/src/commands/aiken/mod.rs b/src/commands/aiken/mod.rs index 807a0b1..abe9901 100644 --- a/src/commands/aiken/mod.rs +++ b/src/commands/aiken/mod.rs @@ -3,15 +3,16 @@ use miette::Result; use crate::config::{ProfileConfig, RootConfig}; -pub mod analyze; +pub mod audit; pub mod model; +pub mod provider; -pub use analyze::run as run_analyze; +pub use audit::run as run_audit; #[derive(Subcommand)] pub enum Command { - /// Analyze Aiken code for vulnerabilities using AI-assisted detection - Analyze(analyze::Args), + /// Audit Aiken code for vulnerabilities using AI-assisted detection + Audit(audit::Args), } #[derive(ClapArgs)] @@ -38,6 +39,6 @@ pub fn run(args: Args, config: &RootConfig, profile: &ProfileConfig) -> Result<( fn _run(args: Args, config: &RootConfig, profile: &ProfileConfig) -> Result<()> { match args.command { - Command::Analyze(args) => run_analyze(args, config, profile), + Command::Audit(args) => run_audit(args, config, profile), } } diff --git a/src/commands/aiken/provider.rs b/src/commands/aiken/provider.rs new file mode 100644 index 0000000..be40cd1 --- /dev/null +++ b/src/commands/aiken/provider.rs @@ -0,0 +1,45 @@ +use miette::Result; + +use super::model::{MiniPrompt, ProviderSpec, SkillIterationResult, VulnerabilitySkill}; + +pub trait AnalysisProvider { + fn provider_spec(&self) -> ProviderSpec; + + fn analyze_skill( + &self, + skill: &VulnerabilitySkill, + prompt: &MiniPrompt, + ) -> Result; +} + +#[derive(Debug, Default)] +pub struct ScaffoldProvider; + +impl AnalysisProvider for ScaffoldProvider { + fn provider_spec(&self) -> ProviderSpec { + ProviderSpec { + name: "scaffold".to_string(), + model: None, + notes: "Scaffolding-only provider. No external AI calls are performed.".to_string(), + } + } + + fn analyze_skill( + &self, + skill: &VulnerabilitySkill, + prompt: &MiniPrompt, + ) -> Result { + Ok(SkillIterationResult { + skill_id: skill.id.clone(), + status: "scaffolded".to_string(), + findings: vec![], + next_prompt: Some(MiniPrompt { + skill_id: skill.id.clone(), + text: format!( + "Scaffold follow-up placeholder for skill '{}' based on prompt '{}'.", + skill.id, prompt.text + ), + }), + }) + } +} diff --git a/src/commands/devnet/copy.rs b/src/commands/devnet/copy.rs index 6c0eda6..3f26dd3 100644 --- a/src/commands/devnet/copy.rs +++ b/src/commands/devnet/copy.rs @@ -77,22 +77,23 @@ async fn fetch_utxo_deps( .into_diagnostic()?; if let Some(tx) = tx - && let Some(tx) = tx.parsed { - let utxos = client - .read_utxos( - tx.inputs - .iter() - .map(|r| TxoRef { - hash: r.tx_hash.clone(), - index: r.output_index, - }) - .collect(), - ) - .await - .into_diagnostic()?; - - return Ok(utxos); - } + && let Some(tx) = tx.parsed + { + let utxos = client + .read_utxos( + tx.inputs + .iter() + .map(|r| TxoRef { + hash: r.tx_hash.clone(), + index: r.output_index, + }) + .collect(), + ) + .await + .into_diagnostic()?; + + return Ok(utxos); + } Ok(vec![]) } diff --git a/src/commands/profile/list.rs b/src/commands/profile/list.rs index e0a0885..d60da5c 100644 --- a/src/commands/profile/list.rs +++ b/src/commands/profile/list.rs @@ -4,8 +4,8 @@ use termimad::MadSkin; use crate::config::RootConfig; use super::{ - resolve_network_source, resolve_profile_source, NetworkListItem, ProfileListItem, - ProfileListView, + NetworkListItem, ProfileListItem, ProfileListView, resolve_network_source, + resolve_profile_source, }; // ============================================================================ diff --git a/src/commands/profile/show.rs b/src/commands/profile/show.rs index ba2bdbb..76465e8 100644 --- a/src/commands/profile/show.rs +++ b/src/commands/profile/show.rs @@ -4,8 +4,8 @@ use termimad::MadSkin; use crate::config::{NetworkConfig, ProfileConfig, RootConfig}; use super::{ - load_and_mask_env_vars, mask_value, resolve_network_source, resolve_profile_source, ConfigSource, EndpointView, EnvFileStatus, EnvFileView, IdentityView, NetworkView, ProfileView, + load_and_mask_env_vars, mask_value, resolve_network_source, resolve_profile_source, }; // ============================================================================ diff --git a/src/commands/test.rs b/src/commands/test.rs index 0c75066..d53e16a 100644 --- a/src/commands/test.rs +++ b/src/commands/test.rs @@ -6,7 +6,7 @@ use std::{ }; use clap::Args as ClapArgs; -use miette::{bail, Context as _, IntoDiagnostic, Result}; +use miette::{Context as _, IntoDiagnostic, Result, bail}; use serde::{Deserialize, Serialize}; use crate::{ diff --git a/src/devnet/mod.rs b/src/devnet/mod.rs index b9ab7f4..fa498d5 100644 --- a/src/devnet/mod.rs +++ b/src/devnet/mod.rs @@ -9,7 +9,7 @@ use std::{ use miette::{Diagnostic, IntoDiagnostic as _}; use serde::{Deserialize, Serialize}; -use serde_with::{serde_as, DisplayFromStr}; +use serde_with::{DisplayFromStr, serde_as}; use thiserror::Error; use crate::wallet::WalletProxy; diff --git a/src/main.rs b/src/main.rs index 106874e..045e9bc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,12 @@ use clap::Parser; +use miette::{IntoDiagnostic as _, Result}; use trix::{ - builder, cli::{Cli, Commands}, commands as cmds, config::RootConfig, - devnet, dirs, global, home, spawn, telemetry, updates, wallet, + global, telemetry, updates, }; -use miette::{IntoDiagnostic as _, Result}; pub fn load_config() -> Result> { let current_dir = std::env::current_dir().into_diagnostic()?; diff --git a/src/spawn/cshell.rs b/src/spawn/cshell.rs index 62261bf..7e9f6c5 100644 --- a/src/spawn/cshell.rs +++ b/src/spawn/cshell.rs @@ -6,8 +6,8 @@ use std::{ use askama::Template; -use miette::{bail, Context as _, IntoDiagnostic as _}; -use serde::{de, Deserialize, Deserializer, Serialize}; +use miette::{Context as _, IntoDiagnostic as _, bail}; +use serde::{Deserialize, Deserializer, Serialize, de}; use crate::config::{TrpConfig, U5cConfig}; diff --git a/src/spawn/tx3c.rs b/src/spawn/tx3c.rs index eb7c079..ce288ca 100644 --- a/src/spawn/tx3c.rs +++ b/src/spawn/tx3c.rs @@ -1,6 +1,6 @@ use std::{path::Path, process::Command}; -use miette::{bail, Context as _, IntoDiagnostic as _}; +use miette::{Context as _, IntoDiagnostic as _, bail}; use crate::config::RootConfig; diff --git a/src/telemetry/mod.rs b/src/telemetry/mod.rs index 34c0e13..75e65b4 100644 --- a/src/telemetry/mod.rs +++ b/src/telemetry/mod.rs @@ -1,7 +1,10 @@ use tokio::{sync::OnceCell, task::JoinHandle}; use tracing::debug; -use crate::{cli::{Cli, Commands}, global::TelemetryConfig}; +use crate::{ + cli::{Cli, Commands}, + global::TelemetryConfig, +}; mod client; mod fingerprint; diff --git a/src/wallet.rs b/src/wallet.rs index 3bc90b5..37ffc3d 100644 --- a/src/wallet.rs +++ b/src/wallet.rs @@ -6,7 +6,7 @@ use std::{ use askama::Template as _; use bip39::Mnemonic; use cryptoxide::{digest::Digest, sha2::Sha256}; -use miette::{bail, Context, IntoDiagnostic as _, Result}; +use miette::{Context, IntoDiagnostic as _, Result, bail}; use crate::{ config::{IdentityConfig, NetworkConfig, ProfileConfig, RootConfig}, diff --git a/tests/e2e/edge_cases.rs b/tests/e2e/edge_cases.rs index cd12f67..2ef1d5a 100644 --- a/tests/e2e/edge_cases.rs +++ b/tests/e2e/edge_cases.rs @@ -43,14 +43,39 @@ fn init_preserves_existing_test_file() { #[test] #[cfg(feature = "unstable")] -fn aiken_analyze_fails_without_trix_config() { +fn aiken_audit_fails_without_trix_config() { let ctx = TestContext::new(); - let result = ctx.run_trix(&["aiken", "analyze"]); + let result = ctx.run_trix(&["aiken", "audit"]); - assert!(!result.success(), "aiken analyze should fail outside scoped project"); assert!( - result.stderr.contains("No trix.toml found in current directory"), + !result.success(), + "aiken audit should fail outside scoped project" + ); + assert!( + result + .stderr + .contains("No trix.toml found in current directory"), "Expected missing trix.toml error, got stderr: {}", result.stderr ); } + +#[test] +#[cfg(feature = "unstable")] +fn aiken_audit_fails_with_missing_skills_dir() { + let ctx = TestContext::new(); + let init_result = ctx.run_trix(&["init", "--yes"]); + assert_success(&init_result); + + let result = ctx.run_trix(&["aiken", "audit", "--skills-dir", "skills/does-not-exist"]); + + assert!( + !result.success(), + "aiken audit should fail with invalid skills dir" + ); + assert!( + result.stderr.contains("Aiken skills directory not found"), + "Expected missing skills directory error, got stderr: {}", + result.stderr + ); +} diff --git a/tests/e2e/happy_path.rs b/tests/e2e/happy_path.rs index f4435c7..2ea98b0 100644 --- a/tests/e2e/happy_path.rs +++ b/tests/e2e/happy_path.rs @@ -1,5 +1,7 @@ use super::*; use std::path::PathBuf; +#[cfg(feature = "unstable")] +use trix::commands::aiken::model::AnalysisStateJson; use trix::config::KnownLedgerFamily; #[test] @@ -147,16 +149,27 @@ fn devnet_starts_and_cshell_connects() { #[test] #[cfg(feature = "unstable")] -fn aiken_analyze_runs_in_initialized_project() { +fn aiken_audit_runs_in_initialized_project() { let ctx = TestContext::new(); let init_result = ctx.run_trix(&["init", "--yes"]); assert_success(&init_result); - let result = ctx.run_trix(&["aiken", "analyze"]); + let result = ctx.run_trix(&["aiken", "audit"]); assert_success(&result); - assert_output_contains( - &result, - "EXPERIMENTAL", + assert_output_contains(&result, "EXPERIMENTAL"); + + ctx.assert_file_exists(".tx3/aiken-audit/state.json"); + ctx.assert_file_exists(".tx3/aiken-audit/vulnerabilities.md"); + + let state_content = ctx.read_file(".tx3/aiken-audit/state.json"); + let state: AnalysisStateJson = + serde_json::from_str(&state_content).expect("state.json should be valid AnalysisStateJson"); + + assert_eq!(state.version, "1"); + assert_eq!( + state.iterations.len(), + 2, + "expected one iteration per seed skill" ); } diff --git a/tests/e2e/smoke.rs b/tests/e2e/smoke.rs index d87df97..aaadaad 100644 --- a/tests/e2e/smoke.rs +++ b/tests/e2e/smoke.rs @@ -16,14 +16,14 @@ fn aiken_help_runs_without_error() { let result = ctx.run_trix(&["aiken", "--help"]); assert_success(&result); - assert_output_contains(&result, "analyze"); + assert_output_contains(&result, "audit"); } #[test] #[cfg(feature = "unstable")] -fn aiken_analyze_help_runs_without_error() { +fn aiken_audit_help_runs_without_error() { let ctx = TestContext::new(); - let result = ctx.run_trix(&["aiken", "analyze", "--help"]); + let result = ctx.run_trix(&["aiken", "audit", "--help"]); assert_success(&result); assert_output_contains(&result, "vulnerability"); From afe6dda06f38891d0beb0830ae8221997d7149a2 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Mon, 23 Feb 2026 17:49:40 -0300 Subject: [PATCH 03/18] feat: improve skills structure --- Cargo.lock | 20 ++ Cargo.toml | 1 + .../003-ai-aiken-vulnerability-scaffolding.md | 17 +- .../vulnerabilities/001-state-transition.md | 30 +- .../vulnerabilities/002-authz-boundaries.md | 30 +- .../003-strict-value-equality.md | 35 +++ src/commands/aiken/audit.rs | 265 ++++++++++++++---- src/commands/aiken/model.rs | 8 +- 8 files changed, 340 insertions(+), 66 deletions(-) create mode 100644 skills/vulnerabilities/003-strict-value-equality.md diff --git a/Cargo.lock b/Cargo.lock index c3b21f7..99640c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3630,6 +3630,19 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_yaml_ng" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4db627b98b36d4203a7b458cf3573730f2bb591b28871d916dfa9efabfd41f" +dependencies = [ + "indexmap 2.10.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha1" version = "0.10.6" @@ -4402,6 +4415,7 @@ dependencies = [ "serde", "serde_json", "serde_with", + "serde_yaml_ng", "tempfile", "termimad", "thiserror 2.0.17", @@ -4514,6 +4528,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 5ed8a6c..1062b64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ ed25519-bip32 = "0.4.1" bip39 = "2.1.0" octocrab = "0.44" serde_with = "3.14.0" +serde_yaml_ng = "0.10" askama = "0.14.0" prost = "0.13" tracing = "0.1" diff --git a/design/003-ai-aiken-vulnerability-scaffolding.md b/design/003-ai-aiken-vulnerability-scaffolding.md index a7dd750..6d4c816 100644 --- a/design/003-ai-aiken-vulnerability-scaffolding.md +++ b/design/003-ai-aiken-vulnerability-scaffolding.md @@ -156,13 +156,24 @@ Vulnerability skills live under: One file per skill, designed for 1:1 loop processing. -Suggested frontmatter-like fields per file: +Each skill file uses YAML frontmatter plus optional markdown guidance body: + +Required frontmatter fields: - `id` -- `title` -- `severity` +- `name` +- `severity` (`low` | `medium` | `high` | `critical`) - `description` - `prompt_fragment` +Optional frontmatter fields: +- `examples` (list) +- `false_positives` (list) +- `references` (list) +- `tags` (list) +- `confidence_hint` + +The markdown body can include richer instructions, rationale, and examples and is passed as guidance context to the prompt builder. + ## Milestone 1 Acceptance Criteria - `trix aiken` command exists as a scoped command with subcommand structure. diff --git a/skills/vulnerabilities/001-state-transition.md b/skills/vulnerabilities/001-state-transition.md index 014296b..9c678de 100644 --- a/skills/vulnerabilities/001-state-transition.md +++ b/skills/vulnerabilities/001-state-transition.md @@ -1,5 +1,33 @@ +--- id: state-transition-001 -title: Unsafe state transition validation +name: Unsafe state transition validation severity: high description: Ensure transitions are fully guarded by explicit preconditions and invariants. prompt_fragment: Review all state transition paths and identify missing or bypassable validation checks. +examples: + - A transition branch updates state without validating current state/version. + - A fallback branch bypasses checks that are present in the main path. +false_positives: + - Branches that are unreachable due to upstream exhaustive pattern matching. +references: + - https://plutus.cardano.intersectmbo.org/ +tags: + - state-machine + - invariants +confidence_hint: medium +--- + +# When to use + +Use this skill when auditing validators or state machines that evolve datum/state across transactions. + +# Detection instructions + +1. Enumerate every possible transition branch. +2. Verify explicit preconditions for each branch (state shape, signer set, timing/value gates). +3. Check for bypasses where validation exists in one branch but not in another. +4. Confirm invariants are preserved before and after transitions. + +# Reporting guidance + +Prefer findings with concrete branch/path evidence and explain why a transition can be bypassed or made inconsistent. diff --git a/skills/vulnerabilities/002-authz-boundaries.md b/skills/vulnerabilities/002-authz-boundaries.md index 966bee4..050d009 100644 --- a/skills/vulnerabilities/002-authz-boundaries.md +++ b/skills/vulnerabilities/002-authz-boundaries.md @@ -1,5 +1,33 @@ +--- id: authz-boundaries-002 -title: Authorization boundary bypass +name: Authorization boundary bypass severity: high description: Validate signer and role checks for every sensitive branch. prompt_fragment: Find code paths where authorization assumptions are implicit or can be bypassed. +examples: + - Sensitive branch checks datum fields but does not verify signer identity. + - A role check exists only in one constructor case and not in another. +false_positives: + - Purely read-only branches that cannot trigger state/value changes. +references: + - https://plutus.cardano.intersectmbo.org/ +tags: + - authz + - signers +confidence_hint: medium +--- + +# When to use + +Use this skill for any validator path that can move value, mutate state, or grant privileges. + +# Detection instructions + +1. List all privileged operations and their entry branches. +2. Verify signer checks and role assertions are explicit in each branch. +3. Identify implicit assumptions (e.g., relying on script purpose without signer validation). +4. Ensure negative paths cannot reach privileged effects. + +# Reporting guidance + +Include the exact branch/function where authz is missing and a realistic abuse scenario. diff --git a/skills/vulnerabilities/003-strict-value-equality.md b/skills/vulnerabilities/003-strict-value-equality.md new file mode 100644 index 0000000..5ecd80b --- /dev/null +++ b/skills/vulnerabilities/003-strict-value-equality.md @@ -0,0 +1,35 @@ +--- +id: strict-value-equality-003 +name: Strict value equality on ADA or full Value +severity: high +description: Detect unsatisfiable validator constraints caused by exact equality checks on ADA or complete output values. +prompt_fragment: Read validator scripts and flag strict equality checks on ADA or full output values; treat comparisons using without_lovelace() as acceptable and not strict ADA equality. +examples: + - output.value == expected_value + - output.value.lovelace == exact_amount +false_positives: + - Comparisons using without_lovelace() to ignore ADA component. + - Checks that enforce minimum lovelace instead of exact equality. +references: + - https://plutus.cardano.intersectmbo.org/ +tags: + - value + - lovelace + - constraints +confidence_hint: medium +--- + +# When to use + +Use this skill whenever validators compare output values or ADA amounts for equality. + +# Detection instructions + +1. Find equality checks on full values and lovelace amounts. +2. Flag exact equality constraints that can become unsatisfiable due to fees/min-ADA variability. +3. Accept checks using `without_lovelace()` as intentional ADA-agnostic comparisons. +4. Prefer invariants based on lower bounds for ADA, unless a strict invariant is explicitly justified. + +# Reporting guidance + +Include the equality expression and explain why it can fail in realistic transaction construction. \ No newline at end of file diff --git a/src/commands/aiken/audit.rs b/src/commands/aiken/audit.rs index 7aa5097..a67d4a7 100644 --- a/src/commands/aiken/audit.rs +++ b/src/commands/aiken/audit.rs @@ -1,5 +1,6 @@ use clap::Args as ClapArgs; use miette::{Context, IntoDiagnostic, Result}; +use serde::Deserialize; use std::path::{Path, PathBuf}; use crate::config::{ProfileConfig, RootConfig}; @@ -91,13 +92,51 @@ fn append_iteration(state: &mut AnalysisStateJson, iteration: SkillIterationResu } fn build_mini_prompt(skill: &VulnerabilitySkill) -> MiniPrompt { + let text = compose_skill_prompt(skill); + MiniPrompt { skill_id: skill.id.clone(), - text: format!( - "[{}:{}] {}", - skill.severity, skill.title, skill.prompt_fragment - ), + text, + } +} + +fn compose_skill_prompt(skill: &VulnerabilitySkill) -> String { + let mut sections = vec![ + format!("Skill ID: {}", skill.id), + format!("Name: {}", skill.name), + format!("Severity: {}", skill.severity), + format!("Description: {}", skill.description), + format!("Prompt Fragment: {}", skill.prompt_fragment), + ]; + + if !skill.tags.is_empty() { + sections.push(format!("Tags: {}", skill.tags.join(", "))); + } + + if let Some(hint) = &skill.confidence_hint { + sections.push(format!("Confidence Hint: {}", hint)); + } + + if !skill.examples.is_empty() { + sections.push(format!("Examples:\n- {}", skill.examples.join("\n- "))); + } + + if !skill.false_positives.is_empty() { + sections.push(format!( + "False Positives To Avoid:\n- {}", + skill.false_positives.join("\n- ") + )); + } + + if !skill.references.is_empty() { + sections.push(format!("References:\n- {}", skill.references.join("\n- "))); + } + + if !skill.guidance_markdown.trim().is_empty() { + sections.push(format!("Guidance:\n{}", skill.guidance_markdown.trim())); } + + sections.join("\n\n") } fn build_permission_prompt_spec() -> PermissionPromptSpec { @@ -177,6 +216,10 @@ fn load_embedded_seed_skills() -> Result> { Path::new("skills/vulnerabilities/002-authz-boundaries.md"), include_str!("../../../skills/vulnerabilities/002-authz-boundaries.md"), ), + ( + Path::new("skills/vulnerabilities/003-strict-value-equality.md"), + include_str!("../../../skills/vulnerabilities/003-strict-value-equality.md"), + ), ]; seed_files @@ -194,68 +237,170 @@ fn load_skill_from_file(path: &Path) -> Result { } fn parse_skill_content(path: &Path, content: &str) -> Result { - let mut id = None; - let mut title = None; - let mut severity = None; - let mut description = None; - let mut prompt_fragment = None; - - for line in content - .lines() - .map(str::trim) - .filter(|line| !line.is_empty()) - { - let Some((key, value)) = line.split_once(':') else { - continue; - }; - - let key = key.trim(); - let value = value.trim().to_string(); - - match key { - "id" => id = Some(value), - "title" => title = Some(value), - "severity" => severity = Some(value), - "description" => description = Some(value), - "prompt_fragment" => prompt_fragment = Some(value), - _ => {} - } + let (frontmatter, body) = split_frontmatter(content).with_context(|| { + format!( + "Failed to parse frontmatter from vulnerability skill file {}", + path.display() + ) + })?; + + let parsed: SkillFrontmatter = serde_yaml_ng::from_str(&frontmatter) + .into_diagnostic() + .with_context(|| { + format!( + "Invalid YAML frontmatter in vulnerability skill file {}", + path.display() + ) + })?; + + let severity = parsed.severity.trim().to_ascii_lowercase(); + if !matches!(severity.as_str(), "low" | "medium" | "high" | "critical") { + return Err(miette::miette!( + "Invalid `severity` value '{}' in vulnerability skill file {}. Expected one of: low, medium, high, critical", + parsed.severity, + path.display() + )); } Ok(VulnerabilitySkill { - id: id.ok_or_else(|| { - miette::miette!( - "Missing `id` field in vulnerability skill file {}", - path.display() - ) - })?, - title: title.ok_or_else(|| { - miette::miette!( - "Missing `title` field in vulnerability skill file {}", - path.display() - ) - })?, - severity: severity.ok_or_else(|| { - miette::miette!( - "Missing `severity` field in vulnerability skill file {}", - path.display() - ) - })?, - description: description.ok_or_else(|| { - miette::miette!( - "Missing `description` field in vulnerability skill file {}", - path.display() - ) - })?, - prompt_fragment: prompt_fragment.ok_or_else(|| { - miette::miette!( - "Missing `prompt_fragment` field in vulnerability skill file {}", - path.display() - ) - })?, + id: require_non_empty("id", path, parsed.id)?, + name: require_non_empty("name", path, parsed.name)?, + severity, + description: require_non_empty("description", path, parsed.description)?, + prompt_fragment: require_non_empty("prompt_fragment", path, parsed.prompt_fragment)?, + examples: parsed.examples, + false_positives: parsed.false_positives, + references: parsed.references, + tags: parsed.tags, + confidence_hint: parsed.confidence_hint.filter(|value| !value.trim().is_empty()), + guidance_markdown: body.trim().to_string(), }) } +fn split_frontmatter(content: &str) -> Result<(String, String)> { + let content = content.trim_start_matches('\u{feff}'); + let mut lines = content.lines(); + + let Some(first_line) = lines.next() else { + return Err(miette::miette!("Skill file is empty")); + }; + + if first_line.trim() != "---" { + return Err(miette::miette!( + "Missing frontmatter start delimiter `---`" + )); + } + + let mut frontmatter_lines = Vec::new(); + let mut found_end = false; + + for line in lines.by_ref() { + if line.trim() == "---" { + found_end = true; + break; + } + frontmatter_lines.push(line); + } + + if !found_end { + return Err(miette::miette!( + "Missing frontmatter end delimiter `---`" + )); + } + + let body_lines = lines.collect::>(); + + Ok((frontmatter_lines.join("\n"), body_lines.join("\n"))) +} + +fn require_non_empty(field: &str, path: &Path, value: String) -> Result { + let trimmed = value.trim(); + if trimmed.is_empty() { + return Err(miette::miette!( + "Field `{}` must be non-empty in vulnerability skill file {}", + field, + path.display() + )); + } + + Ok(trimmed.to_string()) +} + +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +struct SkillFrontmatter { + id: String, + name: String, + severity: String, + description: String, + prompt_fragment: String, + #[serde(default)] + examples: Vec, + #[serde(default)] + false_positives: Vec, + #[serde(default)] + references: Vec, + #[serde(default)] + tags: Vec, + confidence_hint: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_skill_content_reads_frontmatter_and_guidance() { + let content = r#"--- +id: strict-value-equality-003 +name: Strict value equality +severity: high +description: Detect strict equality checks for ADA. +prompt_fragment: Find strict equality on ADA or full values. +examples: + - output.value == expected +tags: + - plutus-v2 +confidence_hint: medium +--- +# Instructions + +Check validator outputs and avoid false positives for without_lovelace(). +"#; + + let skill = parse_skill_content(Path::new("skill.md"), content).expect("should parse"); + + assert_eq!(skill.id, "strict-value-equality-003"); + assert_eq!(skill.name, "Strict value equality"); + assert_eq!(skill.severity, "high"); + assert_eq!(skill.examples.len(), 1); + assert!(skill.guidance_markdown.contains("# Instructions")); + } + + #[test] + fn parse_skill_content_requires_frontmatter() { + let content = "id: foo"; + let error = parse_skill_content(Path::new("skill.md"), content).expect_err("should fail"); + assert!(error.to_string().contains("frontmatter")); + } + + #[test] + fn parse_skill_content_rejects_invalid_severity() { + let content = r#"--- +id: skill-1 +name: Test skill +severity: urgent +description: desc +prompt_fragment: prompt +--- +body +"#; + + let error = parse_skill_content(Path::new("skill.md"), content).expect_err("should fail"); + assert!(error.to_string().contains("Invalid `severity` value")); + } +} + fn write_state(path: &Path, state: &AnalysisStateJson) -> Result<()> { let serialized = serde_json::to_string_pretty(state).into_diagnostic()?; write_text_file(path, &serialized) diff --git a/src/commands/aiken/model.rs b/src/commands/aiken/model.rs index 219c326..b3a44ac 100644 --- a/src/commands/aiken/model.rs +++ b/src/commands/aiken/model.rs @@ -3,10 +3,16 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VulnerabilitySkill { pub id: String, - pub title: String, + pub name: String, pub severity: String, pub description: String, pub prompt_fragment: String, + pub examples: Vec, + pub false_positives: Vec, + pub references: Vec, + pub tags: Vec, + pub confidence_hint: Option, + pub guidance_markdown: String, } #[derive(Debug, Clone, Serialize, Deserialize)] From c8481e2c26fd8393db03b05e05b9b44f8a969e14 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Tue, 24 Feb 2026 12:16:23 -0300 Subject: [PATCH 04/18] feat: enhance Aiken vulnerability detection with multiple analysis providers and improved file discovery --- src/commands/aiken/audit.rs | 239 ++++++++++++++++++++++++++++-- src/commands/aiken/model.rs | 2 + src/commands/aiken/provider.rs | 263 ++++++++++++++++++++++++++++++++- tests/e2e/happy_path.rs | 2 +- 4 files changed, 491 insertions(+), 15 deletions(-) diff --git a/src/commands/aiken/audit.rs b/src/commands/aiken/audit.rs index a67d4a7..17800cb 100644 --- a/src/commands/aiken/audit.rs +++ b/src/commands/aiken/audit.rs @@ -9,9 +9,18 @@ use super::model::{ AnalysisStateJson, MiniPrompt, PermissionPromptSpec, SkillIterationResult, VulnerabilityFinding, VulnerabilityReportSpec, VulnerabilitySkill, }; -use super::provider::{AnalysisProvider, ScaffoldProvider}; +use super::provider::{AnalysisProvider, AnthropicProvider, OpenAiProvider, ScaffoldProvider}; const DEFAULT_SKILLS_DIR: &str = "skills/vulnerabilities"; +const DEFAULT_AI_ENDPOINT: &str = "https://api.openai.com/v1/chat/completions"; +const DEFAULT_AI_MODEL: &str = "gpt-4.1-mini"; +const DEFAULT_AI_API_KEY_ENV: &str = "OPENAI_API_KEY"; +const DEFAULT_ANTHROPIC_ENDPOINT: &str = "https://api.anthropic.com/v1/messages"; +const DEFAULT_ANTHROPIC_MODEL: &str = "claude-3-5-haiku-latest"; +const DEFAULT_ANTHROPIC_API_KEY_ENV: &str = "ANTHROPIC_API_KEY"; +const DEFAULT_ANTHROPIC_VERSION: &str = "2023-06-01"; +const DEFAULT_OLLAMA_ENDPOINT: &str = "http://localhost:11434/v1/chat/completions"; +const DEFAULT_OLLAMA_MODEL: &str = "llama3.1"; #[derive(ClapArgs)] pub struct Args { @@ -26,13 +35,30 @@ pub struct Args { /// Path to vulnerability skill definitions. #[arg(long, default_value = "skills/vulnerabilities")] pub skills_dir: String, + + /// Analysis provider: scaffold | openai | anthropic | ollama + #[arg(long, default_value = "scaffold")] + pub provider: String, + + /// API endpoint override. Default depends on --provider. + #[arg(long)] + pub endpoint: Option, + + /// Model override. Default depends on --provider. + #[arg(long)] + pub model: Option, + + /// API key environment variable override. Default depends on --provider. + #[arg(long)] + pub api_key_env: Option, } pub fn run(args: Args, config: &RootConfig, _profile: &ProfileConfig) -> Result<()> { - run_scaffold_analysis(args, config, &ScaffoldProvider) + let provider = build_provider(&args)?; + run_analysis(args, config, provider.as_ref()) } -fn run_scaffold_analysis( +fn run_analysis( args: Args, config: &RootConfig, provider: &dyn AnalysisProvider, @@ -41,6 +67,13 @@ fn run_scaffold_analysis( let state_out = PathBuf::from(&args.state_out); let report_out = PathBuf::from(&args.report_out); let target_path = config.protocol.main.display().to_string(); + let project_root = std::env::current_dir().into_diagnostic()?; + let source_files = discover_aiken_source_files(&project_root)?; + let source_files = if source_files.is_empty() { + vec![config.protocol.main.clone()] + } else { + source_files + }; let permission_prompt = build_permission_prompt_spec(); let skills = load_skills(&skills_dir, &args.skills_dir)?; @@ -48,23 +81,28 @@ fn run_scaffold_analysis( let mut state = AnalysisStateJson { version: "1".to_string(), target_path: target_path.clone(), + source_files: source_files + .iter() + .map(|path| path.display().to_string()) + .collect(), provider: provider.provider_spec(), - permission_prompt: permission_prompt.clone(), + permission_prompt, iterations: vec![], }; write_state(&state_out, &state)?; - run_skill_loop(&skills, provider, &mut state, &state_out)?; + run_skill_loop(&skills, &source_files, provider, &mut state, &state_out)?; let report = build_report(&state); let report_markdown = render_report_markdown(&report); write_text_file(&report_out, &report_markdown)?; println!( - "⚠️ EXPERIMENTAL: Aiken audit scaffold complete. Skills processed: {}", + "⚠️ EXPERIMENTAL: Aiken audit complete. Iterations processed: {}", state.iterations.len() ); + println!("Aiken source files analyzed: {}", state.source_files.len()); println!("State written to: {}", state_out.display()); println!("Report written to: {}", report_out.display()); @@ -73,20 +111,148 @@ fn run_scaffold_analysis( fn run_skill_loop( skills: &[VulnerabilitySkill], + source_files: &[PathBuf], provider: &dyn AnalysisProvider, state: &mut AnalysisStateJson, state_out: &Path, ) -> Result<()> { - for skill in skills { - let prompt = build_mini_prompt(skill); - let iteration = provider.analyze_skill(skill, &prompt)?; - append_iteration(state, iteration); - write_state(state_out, state)?; + for source_file in source_files { + let source_code = std::fs::read_to_string(source_file) + .into_diagnostic() + .with_context(|| format!("Failed to read source file {}", source_file.display()))?; + let target_path = source_file.display().to_string(); + + for skill in skills { + let prompt = build_mini_prompt(skill); + let iteration = provider.analyze_skill(skill, &prompt, &target_path, &source_code)?; + append_iteration(state, iteration); + write_state(state_out, state)?; + } } Ok(()) } +fn discover_aiken_source_files(project_root: &Path) -> Result> { + let mut files = Vec::new(); + let mut to_visit = vec![project_root.to_path_buf()]; + + while let Some(dir) = to_visit.pop() { + let entries = std::fs::read_dir(&dir) + .into_diagnostic() + .with_context(|| format!("Failed to read directory {}", dir.display()))?; + + for entry in entries { + let entry = entry.into_diagnostic()?; + let path = entry.path(); + + if path.is_dir() { + let skip = path + .file_name() + .and_then(|name| name.to_str()) + .map(|name| matches!(name, ".git" | "target" | ".tx3" | "build")) + .unwrap_or(false); + + if !skip { + to_visit.push(path); + } + continue; + } + + let is_aiken_source = path + .extension() + .and_then(|ext| ext.to_str()) + .map(|ext| ext.eq_ignore_ascii_case("ak")) + .unwrap_or(false); + + if is_aiken_source { + files.push(path); + } + } + } + + files.sort(); + Ok(files) +} + +fn build_provider(args: &Args) -> Result> { + match args.provider.to_ascii_lowercase().as_str() { + "scaffold" => Ok(Box::new(ScaffoldProvider)), + "openai" => { + let endpoint = args + .endpoint + .clone() + .unwrap_or_else(|| DEFAULT_AI_ENDPOINT.to_string()); + let model = args + .model + .clone() + .unwrap_or_else(|| DEFAULT_AI_MODEL.to_string()); + let api_key_env = args + .api_key_env + .as_deref() + .unwrap_or(DEFAULT_AI_API_KEY_ENV); + + let api_key = std::env::var(api_key_env).into_diagnostic().with_context(|| { + format!( + "Missing API key environment variable '{}'. Set it before running with --provider openai.", + api_key_env + ) + })?; + + Ok(Box::new(OpenAiProvider { + endpoint, + api_key, + model, + })) + } + "anthropic" => { + let endpoint = args + .endpoint + .clone() + .unwrap_or_else(|| DEFAULT_ANTHROPIC_ENDPOINT.to_string()); + let model = args + .model + .clone() + .unwrap_or_else(|| DEFAULT_ANTHROPIC_MODEL.to_string()); + let api_key_env = args + .api_key_env + .as_deref() + .unwrap_or(DEFAULT_ANTHROPIC_API_KEY_ENV); + + let api_key = std::env::var(api_key_env) + .into_diagnostic() + .with_context(|| { + format!( + "Missing API key environment variable '{}'. Set it before running with --provider anthropic.", + api_key_env + ) + })?; + + Ok(Box::new(AnthropicProvider { + endpoint, + api_key, + model, + version: DEFAULT_ANTHROPIC_VERSION.to_string(), + })) + } + "ollama" => Ok(Box::new(OpenAiProvider { + endpoint: args + .endpoint + .clone() + .unwrap_or_else(|| DEFAULT_OLLAMA_ENDPOINT.to_string()), + api_key: "ollama".to_string(), + model: args + .model + .clone() + .unwrap_or_else(|| DEFAULT_OLLAMA_MODEL.to_string()), + })), + value => Err(miette::miette!( + "Unsupported provider '{}'. Expected one of: scaffold, openai, anthropic, ollama", + value + )), + } +} + fn append_iteration(state: &mut AnalysisStateJson, iteration: SkillIterationResult) { state.iterations.push(iteration); } @@ -244,7 +410,9 @@ fn parse_skill_content(path: &Path, content: &str) -> Result ) })?; - let parsed: SkillFrontmatter = serde_yaml_ng::from_str(&frontmatter) + let normalized_frontmatter = normalize_yaml_indentation(&frontmatter); + + let parsed: SkillFrontmatter = serde_yaml_ng::from_str(&normalized_frontmatter) .into_diagnostic() .with_context(|| { format!( @@ -277,6 +445,10 @@ fn parse_skill_content(path: &Path, content: &str) -> Result }) } +fn normalize_yaml_indentation(input: &str) -> String { + input.replace('\t', " ") +} + fn split_frontmatter(content: &str) -> Result<(String, String)> { let content = content.trim_start_matches('\u{feff}'); let mut lines = content.lines(); @@ -348,6 +520,7 @@ struct SkillFrontmatter { #[cfg(test)] mod tests { use super::*; + use std::fs; #[test] fn parse_skill_content_reads_frontmatter_and_guidance() { @@ -399,6 +572,48 @@ body let error = parse_skill_content(Path::new("skill.md"), content).expect_err("should fail"); assert!(error.to_string().contains("Invalid `severity` value")); } + + #[test] + fn discover_aiken_source_files_finds_ak_files_recursively() { + let temp = tempfile::tempdir().expect("temp dir"); + let root = temp.path(); + let validators = root.join("onchain/validators"); + + fs::create_dir_all(&validators).expect("create validators dir"); + fs::write(validators.join("spend.ak"), "validator spend {}").expect("write ak file"); + fs::write(validators.join("readme.md"), "# ignore").expect("write non-ak file"); + + let files = discover_aiken_source_files(root).expect("should discover files"); + + assert_eq!(files.len(), 1); + assert!(files[0].ends_with("onchain/validators/spend.ak")); + } + + #[test] + fn discover_aiken_source_files_skips_target_tx3_and_build_dirs() { + let temp = tempfile::tempdir().expect("temp dir"); + let root = temp.path(); + + let normal_dir = root.join("contracts"); + let target_dir = root.join("target/generated"); + let tx3_dir = root.join(".tx3/tmp"); + let build_dir = root.join("build/output"); + + fs::create_dir_all(&normal_dir).expect("create normal dir"); + fs::create_dir_all(&target_dir).expect("create target dir"); + fs::create_dir_all(&tx3_dir).expect("create tx3 dir"); + fs::create_dir_all(&build_dir).expect("create build dir"); + + fs::write(normal_dir.join("ok.ak"), "validator ok {}").expect("write ak"); + fs::write(target_dir.join("skip.ak"), "validator skip {}").expect("write ak in target"); + fs::write(tx3_dir.join("skip2.ak"), "validator skip2 {}").expect("write ak in tx3"); + fs::write(build_dir.join("skip3.ak"), "validator skip3 {}").expect("write ak in build"); + + let files = discover_aiken_source_files(root).expect("should discover files"); + + assert_eq!(files.len(), 1); + assert!(files[0].ends_with("contracts/ok.ak")); + } } fn write_state(path: &Path, state: &AnalysisStateJson) -> Result<()> { diff --git a/src/commands/aiken/model.rs b/src/commands/aiken/model.rs index b3a44ac..55c44ea 100644 --- a/src/commands/aiken/model.rs +++ b/src/commands/aiken/model.rs @@ -24,6 +24,7 @@ pub struct MiniPrompt { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SkillIterationResult { pub skill_id: String, + pub target_path: String, pub status: String, pub findings: Vec, pub next_prompt: Option, @@ -42,6 +43,7 @@ pub struct VulnerabilityFinding { pub struct AnalysisStateJson { pub version: String, pub target_path: String, + pub source_files: Vec, pub provider: ProviderSpec, pub permission_prompt: PermissionPromptSpec, pub iterations: Vec, diff --git a/src/commands/aiken/provider.rs b/src/commands/aiken/provider.rs index be40cd1..797d67e 100644 --- a/src/commands/aiken/provider.rs +++ b/src/commands/aiken/provider.rs @@ -1,6 +1,10 @@ -use miette::Result; +use miette::{IntoDiagnostic, Result}; +use serde_json::Value; +use tokio::runtime::Handle; -use super::model::{MiniPrompt, ProviderSpec, SkillIterationResult, VulnerabilitySkill}; +use super::model::{ + MiniPrompt, ProviderSpec, SkillIterationResult, VulnerabilityFinding, VulnerabilitySkill, +}; pub trait AnalysisProvider { fn provider_spec(&self) -> ProviderSpec; @@ -9,6 +13,8 @@ pub trait AnalysisProvider { &self, skill: &VulnerabilitySkill, prompt: &MiniPrompt, + target_path: &str, + source_code: &str, ) -> Result; } @@ -28,9 +34,12 @@ impl AnalysisProvider for ScaffoldProvider { &self, skill: &VulnerabilitySkill, prompt: &MiniPrompt, + target_path: &str, + _source_code: &str, ) -> Result { Ok(SkillIterationResult { skill_id: skill.id.clone(), + target_path: target_path.to_string(), status: "scaffolded".to_string(), findings: vec![], next_prompt: Some(MiniPrompt { @@ -43,3 +52,253 @@ impl AnalysisProvider for ScaffoldProvider { }) } } + +#[derive(Debug, Clone)] +pub struct OpenAiProvider { + pub endpoint: String, + pub api_key: String, + pub model: String, +} + +impl AnalysisProvider for OpenAiProvider { + fn provider_spec(&self) -> ProviderSpec { + ProviderSpec { + name: "openai-compatible".to_string(), + model: Some(self.model.clone()), + notes: format!("Endpoint: {}", self.endpoint), + } + } + + fn analyze_skill( + &self, + skill: &VulnerabilitySkill, + prompt: &MiniPrompt, + target_path: &str, + source_code: &str, + ) -> Result { + let system_prompt = "You are a security auditor specialized in Aiken smart contracts. Return JSON only with shape: {\"status\": string, \"findings\": [{\"title\": string, \"severity\": string, \"summary\": string, \"evidence\": [string], \"recommendation\": string}], \"next_prompt\": string|null}."; + let user_prompt = format!( + "Analyze the following Aiken source file for a single vulnerability skill.\n\nTarget path: {}\n\nSkill:\n{}\n\nSource code:\n{}", + target_path, prompt.text, source_code + ); + + let payload = serde_json::json!({ + "model": self.model, + "messages": [ + { + "role": "system", + "content": system_prompt + }, + { + "role": "user", + "content": user_prompt + } + ], + "response_format": { + "type": "json_object" + } + }); + + let response_json = block_on_runtime_aware(async { + let client = reqwest::Client::new(); + let response = client + .post(&self.endpoint) + .bearer_auth(&self.api_key) + .json(&payload) + .send() + .await + .into_diagnostic()?; + + let response = response.error_for_status().into_diagnostic()?; + response.json::().await.into_diagnostic() + })?; + + let content = response_json + .pointer("/choices/0/message/content") + .and_then(Value::as_str) + .ok_or_else(|| { + miette::miette!("AI provider returned an unexpected response payload") + })?; + + let parsed = parse_structured_content(content)?; + + Ok(iteration_from_parsed(skill, target_path, parsed)) + } +} + +#[derive(Debug, Clone)] +pub struct AnthropicProvider { + pub endpoint: String, + pub api_key: String, + pub model: String, + pub version: String, +} + +impl AnalysisProvider for AnthropicProvider { + fn provider_spec(&self) -> ProviderSpec { + ProviderSpec { + name: "anthropic".to_string(), + model: Some(self.model.clone()), + notes: format!("Endpoint: {}", self.endpoint), + } + } + + fn analyze_skill( + &self, + skill: &VulnerabilitySkill, + prompt: &MiniPrompt, + target_path: &str, + source_code: &str, + ) -> Result { + let system_prompt = "You are a security auditor specialized in Aiken smart contracts. Return JSON only with shape: {\"status\": string, \"findings\": [{\"title\": string, \"severity\": string, \"summary\": string, \"evidence\": [string], \"recommendation\": string}], \"next_prompt\": string|null}."; + let user_prompt = format!( + "Analyze the following Aiken source file for a single vulnerability skill.\n\nTarget path: {}\n\nSkill:\n{}\n\nSource code:\n{}", + target_path, prompt.text, source_code + ); + + let payload = serde_json::json!({ + "model": self.model, + "max_tokens": 1200, + "system": system_prompt, + "messages": [ + { + "role": "user", + "content": user_prompt + } + ] + }); + + let response_json = block_on_runtime_aware(async { + let client = reqwest::Client::new(); + let response = client + .post(&self.endpoint) + .header("x-api-key", &self.api_key) + .header("anthropic-version", &self.version) + .json(&payload) + .send() + .await + .into_diagnostic()?; + + let response = response.error_for_status().into_diagnostic()?; + response.json::().await.into_diagnostic() + })?; + + let content = response_json + .pointer("/content/0/text") + .and_then(Value::as_str) + .ok_or_else(|| { + miette::miette!("Anthropic provider returned an unexpected response payload") + })?; + + let parsed = parse_structured_content(content)?; + + Ok(iteration_from_parsed(skill, target_path, parsed)) + } +} + +fn parse_structured_content(content: &str) -> Result { + if let Ok(parsed) = serde_json::from_str::(content) { + return Ok(parsed); + } + + let trimmed = content.trim(); + let fenced = trimmed + .strip_prefix("```json") + .or_else(|| trimmed.strip_prefix("```")) + .map(str::trim); + + if let Some(fenced_content) = fenced { + let fenced_content = fenced_content.strip_suffix("```").unwrap_or(fenced_content); + if let Ok(parsed) = serde_json::from_str::(fenced_content.trim()) { + return Ok(parsed); + } + } + + Err(miette::miette!( + "AI provider response is not valid JSON for structured findings" + )) +} + +fn block_on_runtime_aware(future: F) -> Result +where + F: std::future::Future>, +{ + match Handle::try_current() { + Ok(handle) => tokio::task::block_in_place(|| handle.block_on(future)), + Err(_) => { + let runtime = tokio::runtime::Runtime::new().into_diagnostic()?; + runtime.block_on(future) + } + } +} + +fn iteration_from_parsed( + skill: &VulnerabilitySkill, + target_path: &str, + parsed: Value, +) -> SkillIterationResult { + let findings = parsed + .get("findings") + .and_then(Value::as_array) + .map(|items| { + items + .iter() + .map(|item| VulnerabilityFinding { + title: item + .get("title") + .and_then(Value::as_str) + .unwrap_or("Untitled finding") + .to_string(), + severity: item + .get("severity") + .and_then(Value::as_str) + .unwrap_or(&skill.severity) + .to_string(), + summary: item + .get("summary") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(), + evidence: item + .get("evidence") + .and_then(Value::as_array) + .map(|e| { + e.iter() + .filter_map(Value::as_str) + .map(ToString::to_string) + .collect::>() + }) + .unwrap_or_default(), + recommendation: item + .get("recommendation") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(), + }) + .collect::>() + }) + .unwrap_or_default(); + + let status = parsed + .get("status") + .and_then(Value::as_str) + .unwrap_or("completed") + .to_string(); + + let next_prompt = parsed + .get("next_prompt") + .and_then(Value::as_str) + .filter(|value| !value.trim().is_empty()) + .map(|text| MiniPrompt { + skill_id: skill.id.clone(), + text: text.to_string(), + }); + + SkillIterationResult { + skill_id: skill.id.clone(), + target_path: target_path.to_string(), + status, + findings, + next_prompt, + } +} diff --git a/tests/e2e/happy_path.rs b/tests/e2e/happy_path.rs index 2ea98b0..4ef7360 100644 --- a/tests/e2e/happy_path.rs +++ b/tests/e2e/happy_path.rs @@ -169,7 +169,7 @@ fn aiken_audit_runs_in_initialized_project() { assert_eq!(state.version, "1"); assert_eq!( state.iterations.len(), - 2, + 3, "expected one iteration per seed skill" ); } From 25d8db1f161f559da6db63a6f43ae22617892f5e Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Tue, 24 Feb 2026 12:27:30 -0300 Subject: [PATCH 05/18] feat: rename Aiken command to Audit and update related documentation and tests --- .../003-ai-aiken-vulnerability-scaffolding.md | 46 ++++++++-------- design/003-assets/c4-component.puml | 2 +- design/003-assets/c4-container.puml | 2 +- design/003-assets/c4-context.puml | 2 +- src/cli.rs | 4 +- src/commands/aiken/mod.rs | 44 --------------- src/commands/{aiken/audit.rs => audit/mod.rs} | 55 +++++++++++++------ src/commands/{aiken => audit}/model.rs | 0 src/commands/{aiken => audit}/provider.rs | 0 src/commands/mod.rs | 2 +- src/main.rs | 2 +- src/telemetry/mod.rs | 2 +- tests/e2e/edge_cases.rs | 10 ++-- tests/e2e/happy_path.rs | 10 ++-- tests/e2e/smoke.rs | 12 ++-- 15 files changed, 86 insertions(+), 107 deletions(-) delete mode 100644 src/commands/aiken/mod.rs rename src/commands/{aiken/audit.rs => audit/mod.rs} (92%) rename src/commands/{aiken => audit}/model.rs (100%) rename src/commands/{aiken => audit}/provider.rs (100%) diff --git a/design/003-ai-aiken-vulnerability-scaffolding.md b/design/003-ai-aiken-vulnerability-scaffolding.md index 6d4c816..a99006c 100644 --- a/design/003-ai-aiken-vulnerability-scaffolding.md +++ b/design/003-ai-aiken-vulnerability-scaffolding.md @@ -1,11 +1,11 @@ -# AI Aiken Vulnerability Scaffolding +# AI Vulnerability Scaffolding ## Overview -This document defines the **Milestone 1 scaffolding** for an AI-assisted Aiken vulnerability analysis command in Trix. +This document defines the **Milestone 1 scaffolding** for an AI-assisted vulnerability analysis command in Trix, initially focused on Aiken smart contracts. Scope for this milestone is intentionally limited to: -- CLI wiring for a new scoped command: `trix aiken` +- CLI wiring for a new scoped command: `trix audit` - Contracts for iterative skill-by-skill analysis state (JSON) - Contracts for permission prompt generation and final vulnerability report generation - Local-first execution boundaries and security assumptions @@ -26,7 +26,7 @@ Out of scope for this milestone: ## CLI Surface (Scaffolding) -`trix aiken` is a **scoped** command and requires a project context (`trix.toml`). +`trix audit` is a **scoped** command and requires a project context (`trix.toml`). **⚠️ EXPERIMENTAL**: This command requires the `unstable` feature to be enabled. Build with: ```bash @@ -35,27 +35,29 @@ cargo build --features unstable ### Command Structure -The command follows a subcommand pattern for extensibility: +The command is currently focused on Aiken smart contracts but designed for future extensibility to other languages: ```bash -trix aiken [options] +trix audit [options] ``` -### Available Subcommands (Milestone 1) +### Command (Milestone 1) -#### `trix aiken audit` +#### `trix audit` -Audits Aiken code for vulnerabilities using AI-assisted detection. +Audits smart contract code for vulnerabilities using AI-assisted detection. Currently focused on Aiken (`.ak` files). **Arguments:** -- `--state-out` (default: `.tx3/aiken-audit/state.json`) - Path where the incremental analysis state JSON will be written -- `--report-out` (default: `.tx3/aiken-audit/vulnerabilities.md`) - Path where the final vulnerability report markdown will be written +- `--state-out` (default: `.tx3/audit/state.json`) - Path where the incremental analysis state JSON will be written +- `--report-out` (default: `.tx3/audit/vulnerabilities.md`) - Path where the final vulnerability report markdown will be written - `--skills-dir` (default: `skills/vulnerabilities`) - Path to vulnerability skill definitions +- `--provider` (default: `scaffold`) - Analysis provider: `scaffold` | `openai` | `anthropic` | `ollama` **Example:** ```bash -trix aiken audit -trix aiken audit --state-out ./custom/state.json +trix audit +trix audit --state-out ./custom/state.json +trix audit --provider openai ``` ## Skill-by-skill Loop Contract @@ -76,7 +78,7 @@ This loop enables narrow prompts per skill, improving precision and traceability ### Incremental JSON state Defined by `AnalysisStateJson` and related structures in: -- `src/commands/aiken/model.rs` +- `src/commands/audit/model.rs` (re-exported from `src/commands/aiken/model.rs`) Key sections: - Target metadata and provider spec @@ -129,7 +131,7 @@ docker run --rm -v $(pwd)/design/003-assets:/data plantuml/plantuml:latest *.pum **Source:** [c4-context.puml](003-assets/c4-context.puml) -![C4 Context Diagram](https://www.plantuml.com/plantuml/svg/LP71SXen343l-nKg9peDxA4vfPSmj9b9XpGCQNibnXP1nMjx97c1lw-zcsc7qwxifoTBkXSrhBdpiBpTBcDGFEjsGKSClxCFpGSArcU7S51DSjUsR4xpDz93tcL1jZKXwDp6hatUX2gQYJfFktPvErlNerzFgxOpeiZj_nBpLCYcMIDB35E7_GrClcAFFYRaIGasEGZyP3e31J3WepKU4iS_Q7NoiNcv564trG8KUE2MgyT9_Jz_XcJiqRmXT2QK3__Zjz_EEZLKgFA378foyGXhZZsviUnpKehrV9yrCq9wlmF9zW16O17qm36HlPRkfIIlXaS-a4SOhwfnFRuP96QsU0Do9EyB7REch58qpGRzE8AndR7trng0gPi0antfQq4hEK93mGqQ9s2ZG_W4VTaNijcH9xsyb_a29oP5WUylppz0r2Y22GwU31zTF0LWJ97roZpCvs5e_mu4WvDHDfE4blPawr2wf3AO62rIZpKm_Y8qD7HVikf-_m80) +![C4 Context Diagram](https://www.plantuml.com/plantuml/svg/LP71SXen343l-nKg9peDxA4vfPSmj9b9XpGCQNibnXP1nMjx97c1lw-zcsc7qrhMqvEblHSrhBdpiBpTBcDGFEjsGKSClxCFpGSArcU7S51DSjUsR4xpDz93tcL1jhKWwDp6hatUX2gQYJfFktPvErlNgrzFgxOpeiZj_nRpLCYcMIDB35E7_GrClcAFFYRaIGasEGZyP3e31J3WepKU4iS_Q7NoiNcv564trG8KUE2MgyT9FPy_GpBsQDuGEXFAX__nsszddHegL3aWXw9SFCAQOqzkRFkSb6AztsVDZ93USo3P7i08B88UE2QorzAzbEBLyCW7yWXZgXhNuvj1OcQBDo17yhxGCMlAAaNJRD2FAnZ76MT_hG6Ox4XV2rIPCJsqsg1n0ZOwX4_GPn-GpOuywUMzmHSuCIaAV7zv_W6YHX5CSF1a-EZYAG1ZYgnNvc4p3yFWTo0Od8oocIIqj2TRrz4bbS74Q95wh87n5w5XbFjILV_iNm00) *System context showing developer interaction with Trix CLI, local filesystem, and future external AI provider integration.* @@ -137,7 +139,7 @@ docker run --rm -v $(pwd)/design/003-assets:/data plantuml/plantuml:latest *.pum **Source:** [c4-container.puml](003-assets/c4-container.puml) -![C4 Container Diagram](https://www.plantuml.com/plantuml/svg/RLHDKnin3Btlhr1p2jD2Bfmu4LAOIIUqJA3jaN5sHT34QxkM7CXq-jyhhzbj4dg9hG-z9yalUybYegJMmkpySUQT678O7wUqGVSZMLTz85VRr20yYmI-c4oYUJbRapodLMACjPQWaxFQjvDWZSjGfooDfTLaTdvwbrtVRnUJrh1WdEoJd0NDhQexZEpTkC7j9nXznYrQ7p2EJghxdPQqZrS-kSR4tLWYlMhAWnumMyn79_2x0XZWnhKb3NUTmpGr91fhM-EQaRgNYGK-mioQa4sjXxf40Pt4NV3a1B9fuZgrgCQpMfRSMblPfx7U1qMutv2ri503hjrQ8prBFkU2qGWoR7M5UbsH9Ta1vnhkM59ekWGMFOuOGlHj48MvovTkZE0PuzuS3zW6cm6oxogHRHXQSdHQQWj7mDWICta7nNt4qt7dKV89FoF90SUN-szVivaVY5WnLGaHysgCLNnZTqp_7o1Yy54oKdTv3trhugRnB-vm3XstIJKoF96Y7-skOBxVr9OQZAMukyAqCK7q7DyaIH5F1bg9XWHMRtBAxkcArLaB0xvgIIQTpe4nRgdEC_sfUX5UlyhuYRLauMlDkY6BK4AQjfTZiWIlzy3wjePsDBzQeYrwBvATkwtV86ia1pfVBp8nCJzcLlDRk4G6YIpnLq8MBjewqC11mR-tzX4Pg1DeLePQoHubbrqFoYhlaNyRScikuUD-XeSeSrw0uOrM-ZX8fHJqgkkssGdZJKD5RoMdC1mcjRrc3kYxMVlC9PgctyGJMIn_0-arrfSClHeQ-WM7ykBZ1lHvRSwiBj4r-GFr5m00) +![C4 Container Diagram](https://www.plantuml.com/plantuml/svg/RLHDKnin3Btlhr1p2jD2Bfmu4LAOIIUqJA3jaN5sHT34QxkM7CXq-jyhhzbj4dg9hG-zPqalUybYegJMmkpySUQT678O7wUqGVSZMLTz85VRr20yYmI-c4oYUJbRapodLMACjPQWaxFQjvDWZSjGfooDfTLaTdvwbrtVRnUJrh1WdEoJd0NDhQexZEpTkC7j9nXznYrQ7p2EJghxdPQqZrS-kSR4tLWYlMhAWnumMyn79_2x0XZWnhKb3KzJGwBUjZMk6QbZcLZW8yoi2TdKIgXB5D19t0LFJY2R9gvJYseygsN9hVKH-mJM-w08znko5XPgmBLRQwHdoUTSC1f1XgqEJwyhiYGxu5p37KiAJPV0eiSHGuY-3Q8mTzqbjn4yOzphum7RO3D0zbqbiWr3IuvEIstX21XROYRleBYFU3hkt4e-uMSaEU3uSl_jQpRpQeWLKpKGCO_6MCKtTiVq_mOY1Xyb8tMNzz2t9cuQ_-8E-z1scrGZmnCf-j7i2kRxIMsfmTWLbojC6nD4p_5DaXJnPA2LQ4XWzIQax9wkM9srCE2hbcJIwHmOuvRgtDEVgfVVzQkCdrXB5hxMh1ko22MaQNiv9ellUWziU5R_7CslbcYBqejOvsxhDyWQoO7ErujCJCpBp6f-2vV8894b_ah8maLR1neuAFYtQn-HWJg1PcMebUnHSfdreAooZ_pRa5jp2H-_PrX4wVC236-qqiT1AgMWLrqtpKuORnegUYivXU4mgUqrSq3VzTfdBj4q_IIUo6Bv6qYliQ6aVwkXuRtWK5vyt82kr-RCwX9TapzDVm40) *Container-level view of Trix CLI internals: command entrypoint, skill loop engine, prompt composer, state/report writers, and provider adapter boundary.* @@ -145,7 +147,7 @@ docker run --rm -v $(pwd)/design/003-assets:/data plantuml/plantuml:latest *.pum **Source:** [c4-component.puml](003-assets/c4-component.puml) -![C4 Component Diagram](https://www.plantuml.com/plantuml/svg/TPFFRjim3CRlVWekfnPhuqjFFJNDQKMp1Issgw58J29KVpIAQXeCU_T9jc4dNdeBANw_Zo7fXyY2E5a3oxjcwMtm3Xqt2_s6xbSohlgYdJH98UoPGxolQnA7-KxpFcrIH9BUSHRCfRTrCC8Lr6raPVIcNjxThalgv-gvjY8oKdrgCW-vjggMcI2qGseUVN9AqF5A5FSRcBLTm28fpP1bLZW5Aw-ImTar_Aa01jQLj2eBABahGRju0xclhTxTG22_rGQlP_dLUZMdM7wzIXzGWUoP6K37uBNZsVKPDRvfOnhZXK8giXB2SniMp3Pn8WJRjPGJvIb2ri9xCWv9RBJHV8IM0u9106kB4nUhdMu2vPWpOvKZMBSHF2PjFlDgto9ilKmH5NJw71B8H9IvdUl4Ao1P7QFsRdHS3xaBlems6C48PTCa99zu6o-56QOueljcfiFaZ6ciLsZAB4i4gnBrkq7JRsVXX3b67TkIfvXvNq4pak3C7lg85_6HFU60VotHnUcze52nFb3cHj1E4jeC4GQUNdzyXvOruHC6JzmGEeM4L1Xj1bxwpAVJwWhYv2Qie3Vb3sww8FBlEfStGed0hS-wJy6YfCxE8NjexcR6U_91ozhp7Wbw385sZBj-T9l-0ykMVwlg1KttsKokdF1tmiWH77wCHpHKdr_hfhk4W7JRLewh7mJJanjbNKEe03zGfjBY1VdvsQI8SJo1QTsu_smQTVaJAEGUZFTXFA5JrVzRfNvRM-s4wTzF5TC53Skux_y6mk8RI65CQQzwoBXaJVKF) +![C4 Component Diagram](https://www.plantuml.com/plantuml/svg/TPFFRjim3CRlVWekfnPhuqjFFJNDQKMp1Issgw58J29KVpIAQXeCU_T9jc4dNdeBAVBtCUJdXyY2E5a3oxjcwMtm3Xqt2_s6xbSohlgYdJH98UoPGxolQnA7-KxpFcrIH9BUSMwOIs_hO8GhgDl8okXDlRoxNPVKpzLpRKLafFfKP1voRLKjCq5eXzGy-kIKeEELAEetC5iafHboWnLEmSghP71s3NygW85o9QtAfPJSbQ1Tl07ftLRlRY2Gtsg3rxFyQhsQKoo_NgKFg43sp0oWO_3QyUpwZ1hVj36DyK8XbROImdCR5imsSI84susK4-KfGkw5zsGSajXee_a8BGS4Wm3M5YSgLZlT1CenPyOgHx1k8tXCsdocrRv5s7gP8Yhe-dD881DHvdQg4ws0P7MCshjHSJtaBkWosc048vGRav1yecsy56ROueZkXvakapEciLwXARii4QnArDy6JN-TXHFc67LiIvvYvdq5pKY2CtkO8rx4H_Q40_wrH1Utzu92nVf0cHj1EqbeCqGOU7hzyHrQreHF63nnG-WK4bBBA1buwJEVRgihY1DFM45loX_Sz43attKkRuGIWLkTTP-2HKcTsI5hQEvsndloGSlQynu9UWo1TeoxVdMR_jFAad-hwWLDzzbCN4xuEs7a20u_1XEDrUTktNHE24YtRULm-h5XzCMqhHi85U07obH6lD1-RfiKCOuMcDQDzy-YKPS_WKBkmNWVHWkTTlr_LUgljTLEcFxzj6DweE4Oz_tVG5Xy0ocCAUDL3ncNhAd-0G00) *Component-level detail of the Aiken command module: skill loader, prompt builders, state model, storage, report renderer, and provider adapter.* @@ -176,13 +178,13 @@ The markdown body can include richer instructions, rationale, and examples and i ## Milestone 1 Acceptance Criteria -- `trix aiken` command exists as a scoped command with subcommand structure. +- `trix audit` command exists as a top-level scoped command. - Command is gated behind `unstable` feature flag (following `publish` pattern). -- `trix aiken audit` subcommand is implemented as scaffold. -- `src/commands/aiken/mod.rs` routes subcommands following the `profile` pattern. -- `src/commands/aiken/audit.rs` contains the audit scaffold implementation. +- `trix audit` is implemented as scaffold with Aiken-focused analysis. +- `src/commands/audit/mod.rs` provides the public command interface. +- `src/commands/aiken/audit.rs` contains the core audit implementation (internal). - `src/commands/aiken/model.rs` defines scaffolding contracts for state, findings, and prompts. - Templates for report and permission prompt exist in `templates/aiken/`. - `skills/vulnerabilities/` exists with seed skill files. - This design document includes C4 diagrams as separate PlantUML files in `003-assets/`. -- E2E scaffold tests verify command visibility and baseline behavior for `audit` subcommand (tests run with `--features unstable`). +- E2E scaffold tests verify command visibility and baseline behavior for `audit` (tests run with `--features unstable`). diff --git a/design/003-assets/c4-component.puml b/design/003-assets/c4-component.puml index 9dc99ad..056dc07 100644 --- a/design/003-assets/c4-component.puml +++ b/design/003-assets/c4-component.puml @@ -1,7 +1,7 @@ @startuml C4_Component_AikenVuln !include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Component.puml -Container_Boundary(aiken, "Aiken Audit Command Module") { +Container_Boundary(aiken, "Audit Command Module") { Component(cmd, "run(args, config, profile)", "mod.rs", "Scoped command entrypoint") Component(skill_loader, "Skill Loader", "future module", "Loads one vulnerability skill at a time") Component(mini_prompt, "Mini Prompt Builder", "future module", "Builds focused prompt for current skill") diff --git a/design/003-assets/c4-container.puml b/design/003-assets/c4-container.puml index d3cb30a..f39a8ed 100644 --- a/design/003-assets/c4-container.puml +++ b/design/003-assets/c4-container.puml @@ -3,7 +3,7 @@ Person(dev, "Developer") System_Boundary(trix, "Trix CLI") { - Container(cli, "Aiken Audit Command", "Rust + Clap", "CLI command entrypoint and argument handling") + Container(cli, "Audit Command", "Rust + Clap", "CLI command entrypoint and argument handling") Container(loop, "Skill Loop Engine", "Rust", "Iterates vulnerability skills and updates state") Container(prompt, "Prompt Composer", "Rust + Templates", "Builds mini-prompts and permission prompt") Container(state, "State Writer", "Rust + JSON", "Persists incremental analysis state") diff --git a/design/003-assets/c4-context.puml b/design/003-assets/c4-context.puml index 2812191..64b5f6b 100644 --- a/design/003-assets/c4-context.puml +++ b/design/003-assets/c4-context.puml @@ -6,7 +6,7 @@ System(trix, "Trix CLI", "Tx3 package manager") System_Ext(ai, "External AI Provider", "Optional future provider such as Anthropic") System_Ext(fs, "Local File System", "Project source, skills, outputs") -Rel(dev, trix, "Runs `trix aiken audit`") +Rel(dev, trix, "Runs `trix audit`") Rel(trix, fs, "Reads code + vulnerability skills; writes JSON state and Markdown report") Rel(trix, ai, "Future: sends skill-specific prompts and receives analysis") diff --git a/src/cli.rs b/src/cli.rs index a877294..cb775db 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -54,9 +54,9 @@ pub enum Commands { /// Inspect and manage profiles Profile(commands::profile::Args), - /// Run Aiken vulnerability analysis scaffolding (UNSTABLE - This feature is experimental and may change) + /// Run vulnerability analysis scaffolding (UNSTABLE - This feature is experimental and may change) #[command(hide = true)] - Aiken(commands::aiken::Args), + Audit(commands::audit::Args), /// Publish a Tx3 package into the registry (UNSTABLE - This feature is experimental and may change) #[command(hide = true)] diff --git a/src/commands/aiken/mod.rs b/src/commands/aiken/mod.rs deleted file mode 100644 index abe9901..0000000 --- a/src/commands/aiken/mod.rs +++ /dev/null @@ -1,44 +0,0 @@ -use clap::{Args as ClapArgs, Subcommand}; -use miette::Result; - -use crate::config::{ProfileConfig, RootConfig}; - -pub mod audit; -pub mod model; -pub mod provider; - -pub use audit::run as run_audit; - -#[derive(Subcommand)] -pub enum Command { - /// Audit Aiken code for vulnerabilities using AI-assisted detection - Audit(audit::Args), -} - -#[derive(ClapArgs)] -pub struct Args { - #[clap(subcommand)] - pub command: Command, -} - -#[allow(unused_variables)] -pub fn run(args: Args, config: &RootConfig, profile: &ProfileConfig) -> Result<()> { - #[cfg(feature = "unstable")] - { - _run(args, config, profile) - } - #[cfg(not(feature = "unstable"))] - { - let _ = config; - let _ = profile; - Err(miette::miette!( - "The aiken command is currently unstable and requires the `unstable` feature to be enabled." - )) - } -} - -fn _run(args: Args, config: &RootConfig, profile: &ProfileConfig) -> Result<()> { - match args.command { - Command::Audit(args) => run_audit(args, config, profile), - } -} diff --git a/src/commands/aiken/audit.rs b/src/commands/audit/mod.rs similarity index 92% rename from src/commands/aiken/audit.rs rename to src/commands/audit/mod.rs index 17800cb..7b051ee 100644 --- a/src/commands/aiken/audit.rs +++ b/src/commands/audit/mod.rs @@ -5,11 +5,14 @@ use std::path::{Path, PathBuf}; use crate::config::{ProfileConfig, RootConfig}; -use super::model::{ +mod model; +mod provider; + +use self::model::{ AnalysisStateJson, MiniPrompt, PermissionPromptSpec, SkillIterationResult, VulnerabilityFinding, VulnerabilityReportSpec, VulnerabilitySkill, }; -use super::provider::{AnalysisProvider, AnthropicProvider, OpenAiProvider, ScaffoldProvider}; +use self::provider::{AnalysisProvider, AnthropicProvider, OpenAiProvider, ScaffoldProvider}; const DEFAULT_SKILLS_DIR: &str = "skills/vulnerabilities"; const DEFAULT_AI_ENDPOINT: &str = "https://api.openai.com/v1/chat/completions"; @@ -25,11 +28,11 @@ const DEFAULT_OLLAMA_MODEL: &str = "llama3.1"; #[derive(ClapArgs)] pub struct Args { /// Path where the incremental analysis state JSON will be written. - #[arg(long, default_value = ".tx3/aiken-audit/state.json")] + #[arg(long, default_value = ".tx3/audit/state.json")] pub state_out: String, /// Path where the final vulnerability report markdown will be written. - #[arg(long, default_value = ".tx3/aiken-audit/vulnerabilities.md")] + #[arg(long, default_value = ".tx3/audit/vulnerabilities.md")] pub report_out: String, /// Path to vulnerability skill definitions. @@ -53,7 +56,25 @@ pub struct Args { pub api_key_env: Option, } -pub fn run(args: Args, config: &RootConfig, _profile: &ProfileConfig) -> Result<()> { +#[allow(unused_variables)] +pub fn run(args: Args, config: &RootConfig, profile: &ProfileConfig) -> Result<()> { + #[cfg(feature = "unstable")] + { + _run(args, config, profile) + } + #[cfg(not(feature = "unstable"))] + { + let _ = args; + let _ = config; + let _ = profile; + + Err(miette::miette!( + "The audit command is currently unstable and requires the `unstable` feature to be enabled." + )) + } +} + +pub fn _run(args: Args, config: &RootConfig, _profile: &ProfileConfig) -> Result<()> { let provider = build_provider(&args)?; run_analysis(args, config, provider.as_ref()) } @@ -68,7 +89,7 @@ fn run_analysis( let report_out = PathBuf::from(&args.report_out); let target_path = config.protocol.main.display().to_string(); let project_root = std::env::current_dir().into_diagnostic()?; - let source_files = discover_aiken_source_files(&project_root)?; + let source_files = discover_source_files(&project_root)?; let source_files = if source_files.is_empty() { vec![config.protocol.main.clone()] } else { @@ -99,10 +120,10 @@ fn run_analysis( write_text_file(&report_out, &report_markdown)?; println!( - "⚠️ EXPERIMENTAL: Aiken audit complete. Iterations processed: {}", + "⚠️ EXPERIMENTAL: Audit complete. Iterations processed: {}", state.iterations.len() ); - println!("Aiken source files analyzed: {}", state.source_files.len()); + println!("Source files analyzed: {}", state.source_files.len()); println!("State written to: {}", state_out.display()); println!("Report written to: {}", report_out.display()); @@ -133,7 +154,7 @@ fn run_skill_loop( Ok(()) } -fn discover_aiken_source_files(project_root: &Path) -> Result> { +fn discover_source_files(project_root: &Path) -> Result> { let mut files = Vec::new(); let mut to_visit = vec![project_root.to_path_buf()]; @@ -159,13 +180,13 @@ fn discover_aiken_source_files(project_root: &Path) -> Result> { continue; } - let is_aiken_source = path + let is_source_file = path .extension() .and_then(|ext| ext.to_str()) .map(|ext| ext.eq_ignore_ascii_case("ak")) .unwrap_or(false); - if is_aiken_source { + if is_source_file { files.push(path); } } @@ -329,7 +350,7 @@ fn build_report(state: &AnalysisStateJson) -> VulnerabilityReportSpec { .collect::>(); VulnerabilityReportSpec { - title: "Aiken Vulnerability Report".to_string(), + title: "Vulnerability Report".to_string(), generated_at: chrono::Utc::now().to_rfc3339(), target: state.target_path.clone(), findings, @@ -343,7 +364,7 @@ fn load_skills(skills_dir: &Path, skills_dir_arg: &str) -> Result Result<()> { Commands::Build(args) => cmds::build::run(args, &config, &profile), Commands::Identities(args) => cmds::identities::run(args, &config, &profile), Commands::Profile(args) => cmds::profile::run(args, &config, &profile), - Commands::Aiken(args) => cmds::aiken::run(args, &config, &profile), + Commands::Audit(args) => cmds::audit::run(args, &config, &profile), Commands::Publish(args) => cmds::publish::run(args, &config), Commands::Telemetry(args) => cmds::telemetry::run(args), }; diff --git a/src/telemetry/mod.rs b/src/telemetry/mod.rs index 75e65b4..3ab5e1b 100644 --- a/src/telemetry/mod.rs +++ b/src/telemetry/mod.rs @@ -43,7 +43,7 @@ impl From<&Cli> for Option { Commands::Inspect(_) => Some(CommandMetric::new("inspect")), Commands::Test(_) => Some(CommandMetric::new("test")), Commands::Identities(_) => Some(CommandMetric::new("identities")), - Commands::Aiken(_) => Some(CommandMetric::new("aiken")), + Commands::Audit(_) => Some(CommandMetric::new("audit")), Commands::Publish(_) => Some(CommandMetric::new("publish")), _ => None, } diff --git a/tests/e2e/edge_cases.rs b/tests/e2e/edge_cases.rs index 2ef1d5a..fe052d0 100644 --- a/tests/e2e/edge_cases.rs +++ b/tests/e2e/edge_cases.rs @@ -45,11 +45,11 @@ fn init_preserves_existing_test_file() { #[cfg(feature = "unstable")] fn aiken_audit_fails_without_trix_config() { let ctx = TestContext::new(); - let result = ctx.run_trix(&["aiken", "audit"]); + let result = ctx.run_trix(&["audit"]); assert!( !result.success(), - "aiken audit should fail outside scoped project" + "audit should fail outside scoped project" ); assert!( result @@ -67,14 +67,14 @@ fn aiken_audit_fails_with_missing_skills_dir() { let init_result = ctx.run_trix(&["init", "--yes"]); assert_success(&init_result); - let result = ctx.run_trix(&["aiken", "audit", "--skills-dir", "skills/does-not-exist"]); + let result = ctx.run_trix(&["audit", "--skills-dir", "skills/does-not-exist"]); assert!( !result.success(), - "aiken audit should fail with invalid skills dir" + "audit should fail with invalid skills dir" ); assert!( - result.stderr.contains("Aiken skills directory not found"), + result.stderr.contains("Audit skills directory not found"), "Expected missing skills directory error, got stderr: {}", result.stderr ); diff --git a/tests/e2e/happy_path.rs b/tests/e2e/happy_path.rs index 4ef7360..7083030 100644 --- a/tests/e2e/happy_path.rs +++ b/tests/e2e/happy_path.rs @@ -1,7 +1,7 @@ use super::*; use std::path::PathBuf; #[cfg(feature = "unstable")] -use trix::commands::aiken::model::AnalysisStateJson; +use trix::commands::audit::model::AnalysisStateJson; use trix::config::KnownLedgerFamily; #[test] @@ -154,15 +154,15 @@ fn aiken_audit_runs_in_initialized_project() { let init_result = ctx.run_trix(&["init", "--yes"]); assert_success(&init_result); - let result = ctx.run_trix(&["aiken", "audit"]); + let result = ctx.run_trix(&["audit"]); assert_success(&result); assert_output_contains(&result, "EXPERIMENTAL"); - ctx.assert_file_exists(".tx3/aiken-audit/state.json"); - ctx.assert_file_exists(".tx3/aiken-audit/vulnerabilities.md"); + ctx.assert_file_exists(".tx3/audit/state.json"); + ctx.assert_file_exists(".tx3/audit/vulnerabilities.md"); - let state_content = ctx.read_file(".tx3/aiken-audit/state.json"); + let state_content = ctx.read_file(".tx3/audit/state.json"); let state: AnalysisStateJson = serde_json::from_str(&state_content).expect("state.json should be valid AnalysisStateJson"); diff --git a/tests/e2e/smoke.rs b/tests/e2e/smoke.rs index aaadaad..62320b5 100644 --- a/tests/e2e/smoke.rs +++ b/tests/e2e/smoke.rs @@ -11,20 +11,20 @@ fn init_runs_without_error() { #[test] #[cfg(feature = "unstable")] -fn aiken_help_runs_without_error() { +fn audit_help_runs_without_error() { let ctx = TestContext::new(); - let result = ctx.run_trix(&["aiken", "--help"]); + let result = ctx.run_trix(&["audit", "--help"]); assert_success(&result); - assert_output_contains(&result, "audit"); + assert_output_contains(&result, "vulnerability"); } #[test] #[cfg(feature = "unstable")] -fn aiken_audit_help_runs_without_error() { +fn audit_help_displays_provider_options() { let ctx = TestContext::new(); - let result = ctx.run_trix(&["aiken", "audit", "--help"]); + let result = ctx.run_trix(&["audit", "--help"]); assert_success(&result); - assert_output_contains(&result, "vulnerability"); + assert_output_contains(&result, "provider"); } From eb20dbf95e70cbc6776f147fc99971cb3d6d3763 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Tue, 24 Feb 2026 15:57:58 -0300 Subject: [PATCH 06/18] feat: add audit providers --- .../003-ai-aiken-vulnerability-scaffolding.md | 55 +- src/commands/audit/mod.rs | 305 +++++---- src/commands/audit/model.rs | 17 +- src/commands/audit/provider.rs | 304 --------- src/commands/audit/providers/anthropic.rs | 206 ++++++ src/commands/audit/providers/mod.rs | 120 ++++ src/commands/audit/providers/openai.rs | 211 ++++++ src/commands/audit/providers/scaffold.rs | 42 ++ src/commands/audit/providers/shared.rs | 626 ++++++++++++++++++ templates/aiken/report.md | 1 - 10 files changed, 1459 insertions(+), 428 deletions(-) delete mode 100644 src/commands/audit/provider.rs create mode 100644 src/commands/audit/providers/anthropic.rs create mode 100644 src/commands/audit/providers/mod.rs create mode 100644 src/commands/audit/providers/openai.rs create mode 100644 src/commands/audit/providers/scaffold.rs create mode 100644 src/commands/audit/providers/shared.rs diff --git a/design/003-ai-aiken-vulnerability-scaffolding.md b/design/003-ai-aiken-vulnerability-scaffolding.md index a99006c..d7774d4 100644 --- a/design/003-ai-aiken-vulnerability-scaffolding.md +++ b/design/003-ai-aiken-vulnerability-scaffolding.md @@ -78,18 +78,65 @@ This loop enables narrow prompts per skill, improving precision and traceability ### Incremental JSON state Defined by `AnalysisStateJson` and related structures in: -- `src/commands/audit/model.rs` (re-exported from `src/commands/aiken/model.rs`) +- `src/commands/audit/model.rs` Key sections: -- Target metadata and provider spec +- Source metadata (multi-file) and provider spec - Permission prompt spec (allowed local commands, scope rules) - Ordered list of `SkillIterationResult` +Example (simplified): +```json +{ + "version": "1", + "source_files": [ + "onchain/validators/spend.ak", + "onchain/validators/mint.ak" + ], + "provider": { + "name": "openai-compatible", + "model": "gpt-4.1-mini", + "notes": "Endpoint: https://api.openai.com/v1/chat/completions" + }, + "permission_prompt": { + "shell": "bash", + "allowed_commands": ["grep", "cat", "find", "ls"], + "scope_rules": [ + "Only execute commands within the current project root.", + "Do not write outside designated output artifacts." + ], + "read_scope": "workspace", + "interactive_permissions": false, + "allowed_paths": [] + }, + "iterations": [ + { + "skill_id": "strict-value-equality-003", + "status": "completed", + "findings": [ + { + "title": "Strict equality on full value", + "severity": "high", + "summary": "Strict value equality can reject valid transactions.", + "evidence": ["validators/spend.ak:42"], + "recommendation": "Compare lovelace and assets separately.", + "file": "validators/spend.ak", + "line": 42 + } + ], + "next_prompt": null + } + ] +} +``` + ### Final report Defined by `VulnerabilityReportSpec` and a Markdown template scaffold: - `templates/aiken/report.md` +Note: the report no longer includes a single `target` path because analysis is performed over a set of source files. + ### Permission prompt Template scaffold: @@ -182,8 +229,8 @@ The markdown body can include richer instructions, rationale, and examples and i - Command is gated behind `unstable` feature flag (following `publish` pattern). - `trix audit` is implemented as scaffold with Aiken-focused analysis. - `src/commands/audit/mod.rs` provides the public command interface. -- `src/commands/aiken/audit.rs` contains the core audit implementation (internal). -- `src/commands/aiken/model.rs` defines scaffolding contracts for state, findings, and prompts. +- `src/commands/audit/mod.rs` contains the core audit implementation. +- `src/commands/audit/model.rs` defines scaffolding contracts for state, findings, and prompts. - Templates for report and permission prompt exist in `templates/aiken/`. - `skills/vulnerabilities/` exists with seed skill files. - This design document includes C4 diagrams as separate PlantUML files in `003-assets/`. diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index 7b051ee..02ce04e 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -1,4 +1,4 @@ -use clap::Args as ClapArgs; +use clap::{Args as ClapArgs, ValueEnum}; use miette::{Context, IntoDiagnostic, Result}; use serde::Deserialize; use std::path::{Path, PathBuf}; @@ -6,24 +6,30 @@ use std::path::{Path, PathBuf}; use crate::config::{ProfileConfig, RootConfig}; mod model; -mod provider; +mod providers; use self::model::{ AnalysisStateJson, MiniPrompt, PermissionPromptSpec, SkillIterationResult, VulnerabilityFinding, VulnerabilityReportSpec, VulnerabilitySkill, }; -use self::provider::{AnalysisProvider, AnthropicProvider, OpenAiProvider, ScaffoldProvider}; +use self::providers::{build_provider, AnalysisProvider}; const DEFAULT_SKILLS_DIR: &str = "skills/vulnerabilities"; -const DEFAULT_AI_ENDPOINT: &str = "https://api.openai.com/v1/chat/completions"; -const DEFAULT_AI_MODEL: &str = "gpt-4.1-mini"; -const DEFAULT_AI_API_KEY_ENV: &str = "OPENAI_API_KEY"; -const DEFAULT_ANTHROPIC_ENDPOINT: &str = "https://api.anthropic.com/v1/messages"; -const DEFAULT_ANTHROPIC_MODEL: &str = "claude-3-5-haiku-latest"; -const DEFAULT_ANTHROPIC_API_KEY_ENV: &str = "ANTHROPIC_API_KEY"; -const DEFAULT_ANTHROPIC_VERSION: &str = "2023-06-01"; -const DEFAULT_OLLAMA_ENDPOINT: &str = "http://localhost:11434/v1/chat/completions"; -const DEFAULT_OLLAMA_MODEL: &str = "llama3.1"; + +#[derive(Debug, Clone, Copy, ValueEnum)] +pub enum ReadScopeArg { + Workspace, + Strict, +} + +impl ReadScopeArg { + fn as_str(self) -> &'static str { + match self { + Self::Workspace => "workspace", + Self::Strict => "strict", + } + } +} #[derive(ClapArgs)] pub struct Args { @@ -54,6 +60,18 @@ pub struct Args { /// API key environment variable override. Default depends on --provider. #[arg(long)] pub api_key_env: Option, + + /// Print interactive AI round-trip steps and local tool actions while auditing. + #[arg(long, default_value_t = false)] + pub ai_logs: bool, + + /// File read scope for AI-assisted local tool requests: workspace | strict. + #[arg(long, value_enum, default_value_t = ReadScopeArg::Workspace)] + pub read_scope: ReadScopeArg, + + /// Ask confirmation before executing each AI-requested local read action. + #[arg(long, default_value_t = false)] + pub interactive_permissions: bool, } #[allow(unused_variables)] @@ -87,7 +105,6 @@ fn run_analysis( let skills_dir = PathBuf::from(&args.skills_dir); let state_out = PathBuf::from(&args.state_out); let report_out = PathBuf::from(&args.report_out); - let target_path = config.protocol.main.display().to_string(); let project_root = std::env::current_dir().into_diagnostic()?; let source_files = discover_source_files(&project_root)?; let source_files = if source_files.is_empty() { @@ -96,24 +113,46 @@ fn run_analysis( source_files }; - let permission_prompt = build_permission_prompt_spec(); + log_audit_progress( + args.ai_logs, + format!( + "[i] setup provider={} source_files={}", + provider.provider_spec().name, + source_files.len() + ), + ); + + let permission_prompt = build_permission_prompt_spec( + args.read_scope, + args.interactive_permissions, + &project_root, + &source_files, + ); let skills = load_skills(&skills_dir, &args.skills_dir)?; let mut state = AnalysisStateJson { version: "1".to_string(), - target_path: target_path.clone(), source_files: source_files .iter() .map(|path| path.display().to_string()) .collect(), provider: provider.provider_spec(), - permission_prompt, + permission_prompt: permission_prompt.clone(), iterations: vec![], }; write_state(&state_out, &state)?; - run_skill_loop(&skills, &source_files, provider, &mut state, &state_out)?; + run_skill_loop( + &skills, + &source_files, + &project_root, + &permission_prompt, + provider, + args.ai_logs, + &mut state, + &state_out, + )?; let report = build_report(&state); let report_markdown = render_report_markdown(&report); @@ -133,27 +172,75 @@ fn run_analysis( fn run_skill_loop( skills: &[VulnerabilitySkill], source_files: &[PathBuf], + project_root: &Path, + permission_prompt: &PermissionPromptSpec, provider: &dyn AnalysisProvider, + ai_logs: bool, state: &mut AnalysisStateJson, state_out: &Path, ) -> Result<()> { - for source_file in source_files { - let source_code = std::fs::read_to_string(source_file) - .into_diagnostic() - .with_context(|| format!("Failed to read source file {}", source_file.display()))?; - let target_path = source_file.display().to_string(); - - for skill in skills { - let prompt = build_mini_prompt(skill); - let iteration = provider.analyze_skill(skill, &prompt, &target_path, &source_code)?; - append_iteration(state, iteration); - write_state(state_out, state)?; - } + let source_references = source_files + .iter() + .map(|path| display_path_for_prompt(project_root, path)) + .collect::>(); + + let total_skills = skills.len(); + + for (skill_idx, skill) in skills.iter().enumerate() { + log_audit_progress( + ai_logs, + format!( + "[ ] skill {}/{} start '{}' ({})", + skill_idx + 1, + total_skills, + skill.id, + skill.name + ), + ); + + let prompt = build_mini_prompt(skill); + let iteration = provider.analyze_skill( + skill, + &prompt, + &source_references, + project_root, + permission_prompt, + )?; + + let findings_count = iteration.findings.len(); + let status = iteration.status.clone(); + + append_iteration(state, iteration); + write_state(state_out, state)?; + + log_audit_progress( + ai_logs, + format!( + "[x] skill {}/{} done '{}' status={} findings={} (state persisted)", + skill_idx + 1, + total_skills, + skill.id, + status, + findings_count + ), + ); } Ok(()) } +fn log_audit_progress(enabled: bool, message: impl AsRef) { + if enabled { + eprintln!("[audit][todo] {}", message.as_ref()); + } +} + +fn display_path_for_prompt(project_root: &Path, path: &Path) -> String { + path.strip_prefix(project_root) + .map(|relative| relative.display().to_string()) + .unwrap_or_else(|_| path.display().to_string()) +} + fn discover_source_files(project_root: &Path) -> Result> { let mut files = Vec::new(); let mut to_visit = vec![project_root.to_path_buf()]; @@ -196,84 +283,6 @@ fn discover_source_files(project_root: &Path) -> Result> { Ok(files) } -fn build_provider(args: &Args) -> Result> { - match args.provider.to_ascii_lowercase().as_str() { - "scaffold" => Ok(Box::new(ScaffoldProvider)), - "openai" => { - let endpoint = args - .endpoint - .clone() - .unwrap_or_else(|| DEFAULT_AI_ENDPOINT.to_string()); - let model = args - .model - .clone() - .unwrap_or_else(|| DEFAULT_AI_MODEL.to_string()); - let api_key_env = args - .api_key_env - .as_deref() - .unwrap_or(DEFAULT_AI_API_KEY_ENV); - - let api_key = std::env::var(api_key_env).into_diagnostic().with_context(|| { - format!( - "Missing API key environment variable '{}'. Set it before running with --provider openai.", - api_key_env - ) - })?; - - Ok(Box::new(OpenAiProvider { - endpoint, - api_key, - model, - })) - } - "anthropic" => { - let endpoint = args - .endpoint - .clone() - .unwrap_or_else(|| DEFAULT_ANTHROPIC_ENDPOINT.to_string()); - let model = args - .model - .clone() - .unwrap_or_else(|| DEFAULT_ANTHROPIC_MODEL.to_string()); - let api_key_env = args - .api_key_env - .as_deref() - .unwrap_or(DEFAULT_ANTHROPIC_API_KEY_ENV); - - let api_key = std::env::var(api_key_env) - .into_diagnostic() - .with_context(|| { - format!( - "Missing API key environment variable '{}'. Set it before running with --provider anthropic.", - api_key_env - ) - })?; - - Ok(Box::new(AnthropicProvider { - endpoint, - api_key, - model, - version: DEFAULT_ANTHROPIC_VERSION.to_string(), - })) - } - "ollama" => Ok(Box::new(OpenAiProvider { - endpoint: args - .endpoint - .clone() - .unwrap_or_else(|| DEFAULT_OLLAMA_ENDPOINT.to_string()), - api_key: "ollama".to_string(), - model: args - .model - .clone() - .unwrap_or_else(|| DEFAULT_OLLAMA_MODEL.to_string()), - })), - value => Err(miette::miette!( - "Unsupported provider '{}'. Expected one of: scaffold, openai, anthropic, ollama", - value - )), - } -} - fn append_iteration(state: &mut AnalysisStateJson, iteration: SkillIterationResult) { state.iterations.push(iteration); } @@ -326,7 +335,43 @@ fn compose_skill_prompt(skill: &VulnerabilitySkill) -> String { sections.join("\n\n") } -fn build_permission_prompt_spec() -> PermissionPromptSpec { +fn build_permission_prompt_spec( + read_scope: ReadScopeArg, + interactive_permissions: bool, + project_root: &Path, + source_files: &[PathBuf], +) -> PermissionPromptSpec { + let allowed_paths = if matches!(read_scope, ReadScopeArg::Strict) { + source_files + .iter() + .map(|path| display_path_for_prompt(project_root, path)) + .collect::>() + } else { + vec![] + }; + + let mut scope_rules = vec![ + "Only execute commands within the current project root.".to_string(), + "Do not write outside designated output artifacts.".to_string(), + ]; + + if matches!(read_scope, ReadScopeArg::Strict) { + scope_rules.push( + "Read scope is strict: only known source files are allowed for reads/searches; directory listing and file discovery requests are denied.".to_string(), + ); + } else { + scope_rules.push( + "Read scope is workspace: any path under project root can be read/searched.".to_string(), + ); + } + + if interactive_permissions { + scope_rules.push( + "Interactive permissions are enabled: every local read action requires explicit user confirmation." + .to_string(), + ); + } + PermissionPromptSpec { shell: "bash".to_string(), allowed_commands: vec![ @@ -335,10 +380,10 @@ fn build_permission_prompt_spec() -> PermissionPromptSpec { "find".to_string(), "ls".to_string(), ], - scope_rules: vec![ - "Only execute commands within the current project root.".to_string(), - "Do not write outside designated output artifacts.".to_string(), - ], + scope_rules, + read_scope: read_scope.as_str().to_string(), + interactive_permissions, + allowed_paths, } } @@ -352,7 +397,6 @@ fn build_report(state: &AnalysisStateJson) -> VulnerabilityReportSpec { VulnerabilityReportSpec { title: "Vulnerability Report".to_string(), generated_at: chrono::Utc::now().to_rfc3339(), - target: state.target_path.clone(), findings, } } @@ -635,6 +679,23 @@ body assert_eq!(files.len(), 1); assert!(files[0].ends_with("contracts/ok.ak")); } + + #[test] + fn render_findings_markdown_includes_location_when_available() { + let findings = vec![VulnerabilityFinding { + title: "Strict equality on value".to_string(), + severity: "high".to_string(), + summary: "Potential bypass due to strict value equality".to_string(), + evidence: vec!["validators/spend.ak:42".to_string()], + recommendation: "Compare lovelace and assets separately".to_string(), + file: Some("validators/spend.ak".to_string()), + line: Some(42), + }]; + + let markdown = render_findings_markdown(&findings); + + assert!(markdown.contains("Location: validators/spend.ak:42")); + } } fn write_state(path: &Path, state: &AnalysisStateJson) -> Result<()> { @@ -659,7 +720,6 @@ fn render_report_markdown(report: &VulnerabilityReportSpec) -> String { let findings_markdown = render_findings_markdown(&report.findings); template - .replace("{{ target }}", &report.target) .replace("{{ generated_at }}", &report.generated_at) .replace("{{ findings_markdown }}", &findings_markdown) } @@ -672,10 +732,23 @@ fn render_findings_markdown(findings: &[VulnerabilityFinding]) -> String { findings .iter() .map(|finding| { - format!( + let mut markdown = format!( "- **{}** (`{}`)\n - Summary: {}\n - Recommendation: {}", finding.title, finding.severity, finding.summary, finding.recommendation - ) + ); + + let location = match (&finding.file, finding.line) { + (Some(file), Some(line)) => Some(format!("{}:{}", file, line)), + (Some(file), None) => Some(file.clone()), + (None, Some(line)) => Some(format!("line {}", line)), + (None, None) => None, + }; + + if let Some(location) = location { + markdown.push_str(&format!("\n - Location: {}", location)); + } + + markdown }) .collect::>() .join("\n") diff --git a/src/commands/audit/model.rs b/src/commands/audit/model.rs index 55c44ea..e69f761 100644 --- a/src/commands/audit/model.rs +++ b/src/commands/audit/model.rs @@ -24,7 +24,6 @@ pub struct MiniPrompt { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SkillIterationResult { pub skill_id: String, - pub target_path: String, pub status: String, pub findings: Vec, pub next_prompt: Option, @@ -37,12 +36,15 @@ pub struct VulnerabilityFinding { pub summary: String, pub evidence: Vec, pub recommendation: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub file: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub line: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AnalysisStateJson { pub version: String, - pub target_path: String, pub source_files: Vec, pub provider: ProviderSpec, pub permission_prompt: PermissionPromptSpec, @@ -61,12 +63,21 @@ pub struct PermissionPromptSpec { pub shell: String, pub allowed_commands: Vec, pub scope_rules: Vec, + #[serde(default = "default_read_scope")] + pub read_scope: String, + #[serde(default)] + pub interactive_permissions: bool, + #[serde(default)] + pub allowed_paths: Vec, +} + +fn default_read_scope() -> String { + "workspace".to_string() } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VulnerabilityReportSpec { pub title: String, pub generated_at: String, - pub target: String, pub findings: Vec, } diff --git a/src/commands/audit/provider.rs b/src/commands/audit/provider.rs deleted file mode 100644 index 797d67e..0000000 --- a/src/commands/audit/provider.rs +++ /dev/null @@ -1,304 +0,0 @@ -use miette::{IntoDiagnostic, Result}; -use serde_json::Value; -use tokio::runtime::Handle; - -use super::model::{ - MiniPrompt, ProviderSpec, SkillIterationResult, VulnerabilityFinding, VulnerabilitySkill, -}; - -pub trait AnalysisProvider { - fn provider_spec(&self) -> ProviderSpec; - - fn analyze_skill( - &self, - skill: &VulnerabilitySkill, - prompt: &MiniPrompt, - target_path: &str, - source_code: &str, - ) -> Result; -} - -#[derive(Debug, Default)] -pub struct ScaffoldProvider; - -impl AnalysisProvider for ScaffoldProvider { - fn provider_spec(&self) -> ProviderSpec { - ProviderSpec { - name: "scaffold".to_string(), - model: None, - notes: "Scaffolding-only provider. No external AI calls are performed.".to_string(), - } - } - - fn analyze_skill( - &self, - skill: &VulnerabilitySkill, - prompt: &MiniPrompt, - target_path: &str, - _source_code: &str, - ) -> Result { - Ok(SkillIterationResult { - skill_id: skill.id.clone(), - target_path: target_path.to_string(), - status: "scaffolded".to_string(), - findings: vec![], - next_prompt: Some(MiniPrompt { - skill_id: skill.id.clone(), - text: format!( - "Scaffold follow-up placeholder for skill '{}' based on prompt '{}'.", - skill.id, prompt.text - ), - }), - }) - } -} - -#[derive(Debug, Clone)] -pub struct OpenAiProvider { - pub endpoint: String, - pub api_key: String, - pub model: String, -} - -impl AnalysisProvider for OpenAiProvider { - fn provider_spec(&self) -> ProviderSpec { - ProviderSpec { - name: "openai-compatible".to_string(), - model: Some(self.model.clone()), - notes: format!("Endpoint: {}", self.endpoint), - } - } - - fn analyze_skill( - &self, - skill: &VulnerabilitySkill, - prompt: &MiniPrompt, - target_path: &str, - source_code: &str, - ) -> Result { - let system_prompt = "You are a security auditor specialized in Aiken smart contracts. Return JSON only with shape: {\"status\": string, \"findings\": [{\"title\": string, \"severity\": string, \"summary\": string, \"evidence\": [string], \"recommendation\": string}], \"next_prompt\": string|null}."; - let user_prompt = format!( - "Analyze the following Aiken source file for a single vulnerability skill.\n\nTarget path: {}\n\nSkill:\n{}\n\nSource code:\n{}", - target_path, prompt.text, source_code - ); - - let payload = serde_json::json!({ - "model": self.model, - "messages": [ - { - "role": "system", - "content": system_prompt - }, - { - "role": "user", - "content": user_prompt - } - ], - "response_format": { - "type": "json_object" - } - }); - - let response_json = block_on_runtime_aware(async { - let client = reqwest::Client::new(); - let response = client - .post(&self.endpoint) - .bearer_auth(&self.api_key) - .json(&payload) - .send() - .await - .into_diagnostic()?; - - let response = response.error_for_status().into_diagnostic()?; - response.json::().await.into_diagnostic() - })?; - - let content = response_json - .pointer("/choices/0/message/content") - .and_then(Value::as_str) - .ok_or_else(|| { - miette::miette!("AI provider returned an unexpected response payload") - })?; - - let parsed = parse_structured_content(content)?; - - Ok(iteration_from_parsed(skill, target_path, parsed)) - } -} - -#[derive(Debug, Clone)] -pub struct AnthropicProvider { - pub endpoint: String, - pub api_key: String, - pub model: String, - pub version: String, -} - -impl AnalysisProvider for AnthropicProvider { - fn provider_spec(&self) -> ProviderSpec { - ProviderSpec { - name: "anthropic".to_string(), - model: Some(self.model.clone()), - notes: format!("Endpoint: {}", self.endpoint), - } - } - - fn analyze_skill( - &self, - skill: &VulnerabilitySkill, - prompt: &MiniPrompt, - target_path: &str, - source_code: &str, - ) -> Result { - let system_prompt = "You are a security auditor specialized in Aiken smart contracts. Return JSON only with shape: {\"status\": string, \"findings\": [{\"title\": string, \"severity\": string, \"summary\": string, \"evidence\": [string], \"recommendation\": string}], \"next_prompt\": string|null}."; - let user_prompt = format!( - "Analyze the following Aiken source file for a single vulnerability skill.\n\nTarget path: {}\n\nSkill:\n{}\n\nSource code:\n{}", - target_path, prompt.text, source_code - ); - - let payload = serde_json::json!({ - "model": self.model, - "max_tokens": 1200, - "system": system_prompt, - "messages": [ - { - "role": "user", - "content": user_prompt - } - ] - }); - - let response_json = block_on_runtime_aware(async { - let client = reqwest::Client::new(); - let response = client - .post(&self.endpoint) - .header("x-api-key", &self.api_key) - .header("anthropic-version", &self.version) - .json(&payload) - .send() - .await - .into_diagnostic()?; - - let response = response.error_for_status().into_diagnostic()?; - response.json::().await.into_diagnostic() - })?; - - let content = response_json - .pointer("/content/0/text") - .and_then(Value::as_str) - .ok_or_else(|| { - miette::miette!("Anthropic provider returned an unexpected response payload") - })?; - - let parsed = parse_structured_content(content)?; - - Ok(iteration_from_parsed(skill, target_path, parsed)) - } -} - -fn parse_structured_content(content: &str) -> Result { - if let Ok(parsed) = serde_json::from_str::(content) { - return Ok(parsed); - } - - let trimmed = content.trim(); - let fenced = trimmed - .strip_prefix("```json") - .or_else(|| trimmed.strip_prefix("```")) - .map(str::trim); - - if let Some(fenced_content) = fenced { - let fenced_content = fenced_content.strip_suffix("```").unwrap_or(fenced_content); - if let Ok(parsed) = serde_json::from_str::(fenced_content.trim()) { - return Ok(parsed); - } - } - - Err(miette::miette!( - "AI provider response is not valid JSON for structured findings" - )) -} - -fn block_on_runtime_aware(future: F) -> Result -where - F: std::future::Future>, -{ - match Handle::try_current() { - Ok(handle) => tokio::task::block_in_place(|| handle.block_on(future)), - Err(_) => { - let runtime = tokio::runtime::Runtime::new().into_diagnostic()?; - runtime.block_on(future) - } - } -} - -fn iteration_from_parsed( - skill: &VulnerabilitySkill, - target_path: &str, - parsed: Value, -) -> SkillIterationResult { - let findings = parsed - .get("findings") - .and_then(Value::as_array) - .map(|items| { - items - .iter() - .map(|item| VulnerabilityFinding { - title: item - .get("title") - .and_then(Value::as_str) - .unwrap_or("Untitled finding") - .to_string(), - severity: item - .get("severity") - .and_then(Value::as_str) - .unwrap_or(&skill.severity) - .to_string(), - summary: item - .get("summary") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(), - evidence: item - .get("evidence") - .and_then(Value::as_array) - .map(|e| { - e.iter() - .filter_map(Value::as_str) - .map(ToString::to_string) - .collect::>() - }) - .unwrap_or_default(), - recommendation: item - .get("recommendation") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(), - }) - .collect::>() - }) - .unwrap_or_default(); - - let status = parsed - .get("status") - .and_then(Value::as_str) - .unwrap_or("completed") - .to_string(); - - let next_prompt = parsed - .get("next_prompt") - .and_then(Value::as_str) - .filter(|value| !value.trim().is_empty()) - .map(|text| MiniPrompt { - skill_id: skill.id.clone(), - text: text.to_string(), - }); - - SkillIterationResult { - skill_id: skill.id.clone(), - target_path: target_path.to_string(), - status, - findings, - next_prompt, - } -} diff --git a/src/commands/audit/providers/anthropic.rs b/src/commands/audit/providers/anthropic.rs new file mode 100644 index 0000000..bacb013 --- /dev/null +++ b/src/commands/audit/providers/anthropic.rs @@ -0,0 +1,206 @@ +use miette::{Context, IntoDiagnostic, Result}; +use serde_json::Value; +use std::path::Path; + +use super::shared::{ + block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, + describe_read_request, execute_read_request, iteration_from_parsed, log_agent_progress, + parse_agent_action, preview_output_for_log, summarize_read_request, AgentAction, + MAX_AGENT_STEPS, +}; +use super::{ + AnalysisProvider, +}; +use crate::commands::audit::model::{ + MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, VulnerabilitySkill, +}; + +#[derive(Debug, Clone)] +pub struct AnthropicProvider { + pub endpoint: String, + pub api_key: String, + pub model: String, + pub version: String, + pub ai_logs: bool, +} + +impl AnalysisProvider for AnthropicProvider { + fn provider_spec(&self) -> ProviderSpec { + ProviderSpec { + name: "anthropic".to_string(), + model: Some(self.model.clone()), + notes: format!("Endpoint: {}", self.endpoint), + } + } + + fn analyze_skill( + &self, + skill: &VulnerabilitySkill, + prompt: &MiniPrompt, + source_references: &[String], + project_root: &Path, + permission_prompt: &PermissionPromptSpec, + ) -> Result { + let canonical_root = project_root.canonicalize().into_diagnostic().with_context(|| { + format!( + "Failed to canonicalize project root {}", + project_root.display() + ) + })?; + + let system_prompt = build_agent_system_prompt(); + let initial_user_prompt = build_initial_user_prompt(prompt, source_references, permission_prompt); + + let mut messages = vec![serde_json::json!({ + "role": "user", + "content": initial_user_prompt, + })]; + + for step_idx in 0..MAX_AGENT_STEPS { + log_agent_progress( + self.ai_logs, + format!( + "[ ] skill={} step={}/{} ask model at {}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + self.endpoint + ), + ); + + let payload = serde_json::json!({ + "model": self.model, + "max_tokens": 1200, + "system": system_prompt, + "messages": messages.clone(), + }); + + let response_json = block_on_runtime_aware(async { + let client = reqwest::Client::new(); + let response = client + .post(&self.endpoint) + .header("x-api-key", &self.api_key) + .header("anthropic-version", &self.version) + .json(&payload) + .send() + .await + .into_diagnostic()?; + + let response = response.error_for_status().into_diagnostic()?; + response.json::().await.into_diagnostic() + })?; + + let content = response_json + .pointer("/content/0/text") + .and_then(Value::as_str) + .ok_or_else(|| { + miette::miette!("Anthropic provider returned an unexpected response payload") + })?; + + messages.push(serde_json::json!({ + "role": "assistant", + "content": content, + })); + + match parse_agent_action(content)? { + AgentAction::Final(parsed) => { + let findings = parsed + .get("findings") + .and_then(Value::as_array) + .map(|items| items.len()) + .unwrap_or(0); + let status = parsed + .get("status") + .and_then(Value::as_str) + .unwrap_or("completed"); + + log_agent_progress( + self.ai_logs, + format!( + "[x] skill={} step={}/{} final status={} findings={}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + status, + findings + ), + ); + return Ok(iteration_from_parsed(skill, parsed)); + } + AgentAction::ReadRequest(request) => { + log_agent_progress( + self.ai_logs, + format!( + "[ ] skill={} step={}/{} {}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + describe_read_request(&request) + ), + ); + + log_agent_progress( + self.ai_logs, + format!( + "[ ] skill={} step={}/{} run local action: {}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + summarize_read_request(&request) + ), + ); + + let output = execute_read_request(&request, &canonical_root, permission_prompt) + .unwrap_or_else(|error| format!("Request failed: {}", error)); + + log_agent_progress( + self.ai_logs, + format!( + "[x] skill={} step={}/{} local action finished chars={}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + output.chars().count() + ), + ); + + log_agent_progress( + self.ai_logs, + format!( + "[i] skill={} step={}/{} output preview: {}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + preview_output_for_log(&output, 180) + ), + ); + + log_agent_progress( + self.ai_logs, + format!( + "[ ] skill={} step={}/{} send local output back to model", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS + ), + ); + + messages.push(serde_json::json!({ + "role": "user", + "content": format!( + "Tool result for {:?}:\n{}\n\nContinue and return JSON.", + request, + output + ), + })); + } + } + } + + Err(miette::miette!( + "Anthropic provider exceeded max interactive read steps ({}) for skill '{}' (enable --ai-logs to inspect progress)", + MAX_AGENT_STEPS, + skill.id + )) + } +} diff --git a/src/commands/audit/providers/mod.rs b/src/commands/audit/providers/mod.rs new file mode 100644 index 0000000..377027c --- /dev/null +++ b/src/commands/audit/providers/mod.rs @@ -0,0 +1,120 @@ +mod anthropic; +mod openai; +mod scaffold; +mod shared; + +use miette::{Context, IntoDiagnostic, Result}; +use std::path::Path; + +use super::model::{ + MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, VulnerabilitySkill, +}; +use super::Args; + +use self::anthropic::AnthropicProvider; +use self::openai::OpenAiProvider; +use self::scaffold::ScaffoldProvider; + +const DEFAULT_AI_ENDPOINT: &str = "https://api.openai.com/v1/chat/completions"; +const DEFAULT_AI_MODEL: &str = "gpt-4.1-mini"; +const DEFAULT_AI_API_KEY_ENV: &str = "OPENAI_API_KEY"; +const DEFAULT_ANTHROPIC_ENDPOINT: &str = "https://api.anthropic.com/v1/messages"; +const DEFAULT_ANTHROPIC_MODEL: &str = "claude-3-5-haiku-latest"; +const DEFAULT_ANTHROPIC_API_KEY_ENV: &str = "ANTHROPIC_API_KEY"; +const DEFAULT_ANTHROPIC_VERSION: &str = "2023-06-01"; +const DEFAULT_OLLAMA_ENDPOINT: &str = "http://localhost:11434/v1/chat/completions"; +const DEFAULT_OLLAMA_MODEL: &str = "llama3.1"; + +pub trait AnalysisProvider { + fn provider_spec(&self) -> ProviderSpec; + + fn analyze_skill( + &self, + skill: &VulnerabilitySkill, + prompt: &MiniPrompt, + source_references: &[String], + project_root: &Path, + permission_prompt: &PermissionPromptSpec, + ) -> Result; +} + +pub fn build_provider(args: &Args) -> Result> { + match args.provider.to_ascii_lowercase().as_str() { + "scaffold" => Ok(Box::new(ScaffoldProvider)), + "openai" => { + let endpoint = args + .endpoint + .clone() + .unwrap_or_else(|| DEFAULT_AI_ENDPOINT.to_string()); + let model = args + .model + .clone() + .unwrap_or_else(|| DEFAULT_AI_MODEL.to_string()); + let api_key_env = args + .api_key_env + .as_deref() + .unwrap_or(DEFAULT_AI_API_KEY_ENV); + + let api_key = std::env::var(api_key_env).into_diagnostic().with_context(|| { + format!( + "Missing API key environment variable '{}'. Set it before running with --provider openai.", + api_key_env + ) + })?; + + Ok(Box::new(OpenAiProvider { + endpoint, + api_key, + model, + ai_logs: args.ai_logs, + })) + } + "anthropic" => { + let endpoint = args + .endpoint + .clone() + .unwrap_or_else(|| DEFAULT_ANTHROPIC_ENDPOINT.to_string()); + let model = args + .model + .clone() + .unwrap_or_else(|| DEFAULT_ANTHROPIC_MODEL.to_string()); + let api_key_env = args + .api_key_env + .as_deref() + .unwrap_or(DEFAULT_ANTHROPIC_API_KEY_ENV); + + let api_key = std::env::var(api_key_env) + .into_diagnostic() + .with_context(|| { + format!( + "Missing API key environment variable '{}'. Set it before running with --provider anthropic.", + api_key_env + ) + })?; + + Ok(Box::new(AnthropicProvider { + endpoint, + api_key, + model, + version: DEFAULT_ANTHROPIC_VERSION.to_string(), + ai_logs: args.ai_logs, + })) + } + "ollama" => Ok(Box::new(OpenAiProvider { + endpoint: args + .endpoint + .clone() + .unwrap_or_else(|| DEFAULT_OLLAMA_ENDPOINT.to_string()), + api_key: "ollama".to_string(), + model: args + .model + .clone() + .unwrap_or_else(|| DEFAULT_OLLAMA_MODEL.to_string()), + ai_logs: args.ai_logs, + })), + value => Err(miette::miette!( + "Unsupported provider '{}'. Expected one of: scaffold, openai, anthropic, ollama", + value + )), + } +} diff --git a/src/commands/audit/providers/openai.rs b/src/commands/audit/providers/openai.rs new file mode 100644 index 0000000..50344a2 --- /dev/null +++ b/src/commands/audit/providers/openai.rs @@ -0,0 +1,211 @@ +use miette::{Context, IntoDiagnostic, Result}; +use serde_json::Value; +use std::path::Path; + +use super::shared::{ + block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, + describe_read_request, execute_read_request, iteration_from_parsed, log_agent_progress, + parse_agent_action, preview_output_for_log, summarize_read_request, AgentAction, + MAX_AGENT_STEPS, +}; +use super::{ + AnalysisProvider, +}; +use crate::commands::audit::model::{ + MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, VulnerabilitySkill, +}; + +#[derive(Debug, Clone)] +pub struct OpenAiProvider { + pub endpoint: String, + pub api_key: String, + pub model: String, + pub ai_logs: bool, +} + +impl AnalysisProvider for OpenAiProvider { + fn provider_spec(&self) -> ProviderSpec { + ProviderSpec { + name: "openai-compatible".to_string(), + model: Some(self.model.clone()), + notes: format!("Endpoint: {}", self.endpoint), + } + } + + fn analyze_skill( + &self, + skill: &VulnerabilitySkill, + prompt: &MiniPrompt, + source_references: &[String], + project_root: &Path, + permission_prompt: &PermissionPromptSpec, + ) -> Result { + let canonical_root = project_root.canonicalize().into_diagnostic().with_context(|| { + format!( + "Failed to canonicalize project root {}", + project_root.display() + ) + })?; + + let system_prompt = build_agent_system_prompt(); + let initial_user_prompt = build_initial_user_prompt(prompt, source_references, permission_prompt); + + let mut messages = vec![ + serde_json::json!({ + "role": "system", + "content": system_prompt, + }), + serde_json::json!({ + "role": "user", + "content": initial_user_prompt, + }), + ]; + + for step_idx in 0..MAX_AGENT_STEPS { + log_agent_progress( + self.ai_logs, + format!( + "[ ] skill={} step={}/{} ask model at {}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + self.endpoint + ), + ); + + let payload = serde_json::json!({ + "model": self.model, + "messages": messages.clone(), + "response_format": { + "type": "json_object" + } + }); + + let response_json = block_on_runtime_aware(async { + let client = reqwest::Client::new(); + let response = client + .post(&self.endpoint) + .bearer_auth(&self.api_key) + .json(&payload) + .send() + .await + .into_diagnostic()?; + + let response = response.error_for_status().into_diagnostic()?; + response.json::().await.into_diagnostic() + })?; + + let content = response_json + .pointer("/choices/0/message/content") + .and_then(Value::as_str) + .ok_or_else(|| { + miette::miette!("AI provider returned an unexpected response payload") + })?; + + messages.push(serde_json::json!({ + "role": "assistant", + "content": content, + })); + + match parse_agent_action(content)? { + AgentAction::Final(parsed) => { + let findings = parsed + .get("findings") + .and_then(Value::as_array) + .map(|items| items.len()) + .unwrap_or(0); + let status = parsed + .get("status") + .and_then(Value::as_str) + .unwrap_or("completed"); + + log_agent_progress( + self.ai_logs, + format!( + "[x] skill={} step={}/{} final status={} findings={}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + status, + findings + ), + ); + return Ok(iteration_from_parsed(skill, parsed)); + } + AgentAction::ReadRequest(request) => { + log_agent_progress( + self.ai_logs, + format!( + "[ ] skill={} step={}/{} {}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + describe_read_request(&request) + ), + ); + + log_agent_progress( + self.ai_logs, + format!( + "[ ] skill={} step={}/{} run local action: {}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + summarize_read_request(&request) + ), + ); + + let output = execute_read_request(&request, &canonical_root, permission_prompt) + .unwrap_or_else(|error| format!("Request failed: {}", error)); + + log_agent_progress( + self.ai_logs, + format!( + "[x] skill={} step={}/{} local action finished chars={}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + output.chars().count() + ), + ); + + log_agent_progress( + self.ai_logs, + format!( + "[i] skill={} step={}/{} output preview: {}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + preview_output_for_log(&output, 180) + ), + ); + + log_agent_progress( + self.ai_logs, + format!( + "[ ] skill={} step={}/{} send local output back to model", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS + ), + ); + + messages.push(serde_json::json!({ + "role": "user", + "content": format!( + "Tool result for {:?}:\n{}\n\nContinue and return JSON.", + request, + output + ), + })); + } + } + } + + Err(miette::miette!( + "AI provider exceeded max interactive read steps ({}) for skill '{}' (enable --ai-logs to inspect progress)", + MAX_AGENT_STEPS, + skill.id + )) + } +} diff --git a/src/commands/audit/providers/scaffold.rs b/src/commands/audit/providers/scaffold.rs new file mode 100644 index 0000000..044f485 --- /dev/null +++ b/src/commands/audit/providers/scaffold.rs @@ -0,0 +1,42 @@ +use miette::Result; +use std::path::Path; + +use super::AnalysisProvider; +use crate::commands::audit::model::{ + MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, VulnerabilitySkill, +}; + +#[derive(Debug, Default)] +pub struct ScaffoldProvider; + +impl AnalysisProvider for ScaffoldProvider { + fn provider_spec(&self) -> ProviderSpec { + ProviderSpec { + name: "scaffold".to_string(), + model: None, + notes: "Scaffolding-only provider. No external AI calls are performed.".to_string(), + } + } + + fn analyze_skill( + &self, + skill: &VulnerabilitySkill, + prompt: &MiniPrompt, + _source_references: &[String], + _project_root: &Path, + _permission_prompt: &PermissionPromptSpec, + ) -> Result { + Ok(SkillIterationResult { + skill_id: skill.id.clone(), + status: "scaffolded".to_string(), + findings: vec![], + next_prompt: Some(MiniPrompt { + skill_id: skill.id.clone(), + text: format!( + "Scaffold follow-up placeholder for skill '{}' based on prompt '{}'.", + skill.id, prompt.text + ), + }), + }) + } +} diff --git a/src/commands/audit/providers/shared.rs b/src/commands/audit/providers/shared.rs new file mode 100644 index 0000000..35474f0 --- /dev/null +++ b/src/commands/audit/providers/shared.rs @@ -0,0 +1,626 @@ +use miette::{Context, IntoDiagnostic, Result}; +use serde::Deserialize; +use serde_json::Value; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; +use std::process::Command; +use tokio::runtime::Handle; + +use crate::commands::audit::model::{ + MiniPrompt, PermissionPromptSpec, SkillIterationResult, VulnerabilityFinding, + VulnerabilitySkill, +}; + +pub(super) const MAX_AGENT_STEPS: usize = 25; +const MAX_COMMAND_OUTPUT_CHARS: usize = 30_000; + +#[derive(Debug)] +pub(super) enum AgentAction { + Final(Value), + ReadRequest(ReadRequest), +} + +#[derive(Debug)] +pub(super) enum ReadRequest { + ReadFile { + path: String, + }, + Grep { + pattern: String, + path: String, + context_lines: usize, + }, + ListDir { + path: String, + }, + FindFiles { + path: String, + glob: Option, + }, +} + +#[derive(Debug, Deserialize)] +struct RawReadRequest { + action: Option, + path: Option, + pattern: Option, + context_lines: Option, + glob: Option, +} + +pub(super) fn build_agent_system_prompt() -> &'static str { + "You are a security auditor specialized in Aiken smart contracts. You must return JSON only. Use an iterative process: request local reads when needed, then finish with findings.\n\nValid JSON actions:\n1) {\"action\":\"read_file\",\"path\":\"relative/path.ak\"}\n2) {\"action\":\"grep\",\"pattern\":\"regex\",\"path\":\"relative/path/or/dir\",\"context_lines\":2}\n3) {\"action\":\"list_dir\",\"path\":\"relative/path\"}\n4) {\"action\":\"find_files\",\"path\":\"relative/path\",\"glob\":\"*.ak\"}\n5) {\"action\":\"final\",\"status\":\"completed|scaffolded\",\"findings\":[{\"title\":string,\"severity\":string,\"summary\":string,\"evidence\":[string],\"recommendation\":string,\"file\":string|null,\"line\":number|null}],\"next_prompt\":string|null}\n\nPrefer returning file and line whenever you can confidently identify where the bug exists or where the recommendation applies.\n\nNever include markdown fences." +} + +fn parse_line_number(value: Option<&Value>) -> Option { + value.and_then(|entry| { + if let Some(number) = entry.as_u64() { + return usize::try_from(number).ok(); + } + + entry + .as_str() + .and_then(|text| text.trim().parse::().ok()) + }) +} + +pub(super) fn build_initial_user_prompt( + prompt: &MiniPrompt, + source_references: &[String], + permission_prompt: &PermissionPromptSpec, +) -> String { + format!( + "Analyze Aiken code for this single vulnerability skill. You are given file references only (no source code inline).\n\nSkill:\n{}\n\nReferenced Aiken files:\n{}\n\nAllowed read commands: {}\nScope rules:\n- {}\n\nReturn JSON action only.", + prompt.text, + render_source_references(source_references), + permission_prompt.allowed_commands.join(", "), + permission_prompt.scope_rules.join("\n- "), + ) +} + +fn render_source_references(source_references: &[String]) -> String { + if source_references.is_empty() { + return "- (none)".to_string(); + } + + source_references + .iter() + .map(|path| format!("- {}", path)) + .collect::>() + .join("\n") +} + +pub(super) fn parse_agent_action(content: &str) -> Result { + let parsed = parse_structured_content(content)?; + + let has_final_shape = parsed.get("findings").is_some() || parsed.get("status").is_some(); + let action_value = parsed + .get("action") + .and_then(Value::as_str) + .map(|value| value.trim().to_ascii_lowercase()); + + if action_value.is_none() && has_final_shape { + return Ok(AgentAction::Final(parsed)); + } + + let raw: RawReadRequest = serde_json::from_value(parsed.clone()) + .into_diagnostic() + .context("Invalid agent action payload")?; + + match raw.action.unwrap_or_else(|| "final".to_string()).as_str() { + "final" => Ok(AgentAction::Final(parsed)), + "read_file" => Ok(AgentAction::ReadRequest(ReadRequest::ReadFile { + path: raw.path.unwrap_or_else(|| ".".to_string()), + })), + "grep" => Ok(AgentAction::ReadRequest(ReadRequest::Grep { + pattern: raw.pattern.unwrap_or_default(), + path: raw.path.unwrap_or_else(|| ".".to_string()), + context_lines: raw.context_lines.unwrap_or(2).min(20), + })), + "list_dir" => Ok(AgentAction::ReadRequest(ReadRequest::ListDir { + path: raw.path.unwrap_or_else(|| ".".to_string()), + })), + "find_files" => Ok(AgentAction::ReadRequest(ReadRequest::FindFiles { + path: raw.path.unwrap_or_else(|| ".".to_string()), + glob: raw.glob, + })), + other => Err(miette::miette!("Unsupported agent action '{}'", other)), + } +} + +pub(super) fn execute_read_request( + request: &ReadRequest, + project_root: &Path, + permission_prompt: &PermissionPromptSpec, +) -> Result { + match request { + ReadRequest::ReadFile { path } => { + ensure_allowed(permission_prompt, "cat")?; + let scoped_path = resolve_scoped_path(project_root, path)?; + enforce_read_scope(request, &scoped_path, project_root, permission_prompt)?; + confirm_request_if_interactive(request, &scoped_path, project_root, permission_prompt)?; + let args = vec![scoped_path.to_string_lossy().to_string()]; + run_command_capture("cat", &args, project_root) + } + ReadRequest::Grep { + pattern, + path, + context_lines, + } => { + ensure_allowed(permission_prompt, "grep")?; + let scoped_path = resolve_scoped_path(project_root, path)?; + enforce_read_scope(request, &scoped_path, project_root, permission_prompt)?; + confirm_request_if_interactive(request, &scoped_path, project_root, permission_prompt)?; + let args = vec![ + "-n".to_string(), + "-C".to_string(), + context_lines.to_string(), + "--".to_string(), + pattern.clone(), + scoped_path.to_string_lossy().to_string(), + ]; + + run_command_capture("grep", &args, project_root) + } + ReadRequest::ListDir { path } => { + ensure_allowed(permission_prompt, "ls")?; + let scoped_path = resolve_scoped_path(project_root, path)?; + enforce_read_scope(request, &scoped_path, project_root, permission_prompt)?; + confirm_request_if_interactive(request, &scoped_path, project_root, permission_prompt)?; + let args = vec!["-la".to_string(), scoped_path.to_string_lossy().to_string()]; + run_command_capture("ls", &args, project_root) + } + ReadRequest::FindFiles { path, glob } => { + ensure_allowed(permission_prompt, "find")?; + let scoped_path = resolve_scoped_path(project_root, path)?; + enforce_read_scope(request, &scoped_path, project_root, permission_prompt)?; + confirm_request_if_interactive(request, &scoped_path, project_root, permission_prompt)?; + let scoped = scoped_path.to_string_lossy().to_string(); + + let args = if let Some(glob) = glob { + vec![ + scoped, + "-type".to_string(), + "f".to_string(), + "-name".to_string(), + glob.clone(), + ] + } else { + vec![scoped, "-type".to_string(), "f".to_string()] + }; + + run_command_capture("find", &args, project_root) + } + } +} + +fn enforce_read_scope( + request: &ReadRequest, + scoped_path: &Path, + project_root: &Path, + permission_prompt: &PermissionPromptSpec, +) -> Result<()> { + if !permission_prompt.read_scope.eq_ignore_ascii_case("strict") { + return Ok(()); + } + + if matches!(request, ReadRequest::ListDir { .. } | ReadRequest::FindFiles { .. }) { + return Err(miette::miette!( + "Request denied by strict read scope: directory listing and file discovery are not allowed" + )); + } + + if !scoped_path.is_file() { + return Err(miette::miette!( + "Request denied by strict read scope: only known source files can be accessed" + )); + } + + let allowed_paths = resolve_allowed_paths(project_root, permission_prompt)?; + + if allowed_paths.iter().any(|allowed| allowed == scoped_path) { + return Ok(()); + } + + Err(miette::miette!( + "Request denied by strict read scope: '{}' is not an allowed source file", + display_relative_path(project_root, scoped_path) + )) +} + +fn resolve_allowed_paths( + project_root: &Path, + permission_prompt: &PermissionPromptSpec, +) -> Result> { + permission_prompt + .allowed_paths + .iter() + .map(|path| resolve_scoped_path(project_root, path)) + .collect::>>() +} + +fn confirm_request_if_interactive( + request: &ReadRequest, + scoped_path: &Path, + project_root: &Path, + permission_prompt: &PermissionPromptSpec, +) -> Result<()> { + if !permission_prompt.interactive_permissions { + return Ok(()); + } + + eprintln!( + "[audit][permission] {} -> {}", + summarize_read_request(request), + display_relative_path(project_root, scoped_path) + ); + eprint!("Allow this request? [y/N]: "); + io::stderr().flush().into_diagnostic()?; + + let mut answer = String::new(); + io::stdin().read_line(&mut answer).into_diagnostic()?; + let accepted = matches!(answer.trim().to_ascii_lowercase().as_str(), "y" | "yes"); + + if accepted { + return Ok(()); + } + + Err(miette::miette!( + "Request denied by user confirmation: {}", + summarize_read_request(request) + )) +} + +fn display_relative_path(project_root: &Path, scoped_path: &Path) -> String { + scoped_path + .strip_prefix(project_root) + .map(|relative| relative.display().to_string()) + .unwrap_or_else(|_| scoped_path.display().to_string()) +} + +fn ensure_allowed(permission_prompt: &PermissionPromptSpec, command: &str) -> Result<()> { + if permission_prompt + .allowed_commands + .iter() + .any(|allowed| allowed.eq_ignore_ascii_case(command)) + { + return Ok(()); + } + + Err(miette::miette!( + "Command '{}' is not permitted by permission prompt", + command + )) +} + +fn resolve_scoped_path(project_root: &Path, requested_path: &str) -> Result { + let requested_path = requested_path.trim(); + let requested_path = if requested_path.is_empty() { + "." + } else { + requested_path + }; + + let joined = if Path::new(requested_path).is_absolute() { + PathBuf::from(requested_path) + } else { + project_root.join(requested_path) + }; + + let canonical = joined + .canonicalize() + .into_diagnostic() + .with_context(|| format!("Path does not exist or is inaccessible: {}", requested_path))?; + + if !canonical.starts_with(project_root) { + return Err(miette::miette!( + "Path escapes project root and is not allowed: {}", + requested_path + )); + } + + Ok(canonical) +} + +fn run_command_capture(command: &str, args: &[String], cwd: &Path) -> Result { + let output = Command::new(command) + .args(args) + .current_dir(cwd) + .output() + .into_diagnostic() + .with_context(|| format!("Failed to run command '{}'", command))?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + + let mut combined = String::new(); + + if !stdout.trim().is_empty() { + combined.push_str(&stdout); + } + + if !stderr.trim().is_empty() { + if !combined.is_empty() { + combined.push('\n'); + } + combined.push_str(&stderr); + } + + if combined.trim().is_empty() { + combined = format!( + "(no output; command exited with status {})", + output.status.code().unwrap_or_default() + ); + } + + if !output.status.success() { + combined.push_str(&format!( + "\n(command exited with status {})", + output.status.code().unwrap_or_default() + )); + } + + if combined.chars().count() > MAX_COMMAND_OUTPUT_CHARS { + let truncated = combined + .chars() + .take(MAX_COMMAND_OUTPUT_CHARS) + .collect::(); + return Ok(format!( + "{}\n...(truncated to {} chars)", + truncated, MAX_COMMAND_OUTPUT_CHARS + )); + } + + Ok(combined) +} + +fn parse_structured_content(content: &str) -> Result { + if let Ok(parsed) = serde_json::from_str::(content) { + return Ok(parsed); + } + + let trimmed = content.trim(); + let fenced = trimmed + .strip_prefix("```json") + .or_else(|| trimmed.strip_prefix("```")) + .map(str::trim); + + if let Some(fenced_content) = fenced { + let fenced_content = fenced_content.strip_suffix("```").unwrap_or(fenced_content); + if let Ok(parsed) = serde_json::from_str::(fenced_content.trim()) { + return Ok(parsed); + } + } + + Err(miette::miette!( + "AI provider response is not valid JSON for structured findings" + )) +} + +pub(super) fn block_on_runtime_aware(future: F) -> Result +where + F: std::future::Future>, +{ + match Handle::try_current() { + Ok(handle) => tokio::task::block_in_place(|| handle.block_on(future)), + Err(_) => { + let runtime = tokio::runtime::Runtime::new().into_diagnostic()?; + runtime.block_on(future) + } + } +} + +pub(super) fn summarize_read_request(request: &ReadRequest) -> String { + match request { + ReadRequest::ReadFile { path } => format!("read_file {}", path), + ReadRequest::Grep { + pattern, + path, + context_lines, + } => format!( + "grep pattern='{}' path={} context_lines={}", + pattern, path, context_lines + ), + ReadRequest::ListDir { path } => format!("list_dir {}", path), + ReadRequest::FindFiles { path, glob } => { + format!("find_files path={} glob={}", path, glob.as_deref().unwrap_or("*")) + } + } +} + +pub(super) fn describe_read_request(request: &ReadRequest) -> String { + match request { + ReadRequest::ReadFile { path } => { + format!("assistant requested: read file '{}'", path) + } + ReadRequest::Grep { + pattern, + path, + context_lines, + } => format!( + "assistant requested: search pattern '{}' in '{}' (context {} lines)", + pattern, path, context_lines + ), + ReadRequest::ListDir { path } => { + format!("assistant requested: list directory '{}'", path) + } + ReadRequest::FindFiles { path, glob } => format!( + "assistant requested: find files in '{}' with glob '{}'", + path, + glob.as_deref().unwrap_or("*") + ), + } +} + +pub(super) fn preview_output_for_log(output: &str, max_chars: usize) -> String { + let compact = output + .replace("\r\n", "\n") + .replace('\n', " ⏎ ") + .replace('\t', " "); + + let char_count = compact.chars().count(); + if char_count <= max_chars { + return compact; + } + + let preview = compact.chars().take(max_chars).collect::(); + format!("{}… ({} chars total)", preview, char_count) +} + +pub(super) fn log_agent_progress(enabled: bool, message: impl AsRef) { + if enabled { + eprintln!("[audit:ai][todo] {}", message.as_ref()); + } +} + +pub(super) fn iteration_from_parsed( + skill: &VulnerabilitySkill, + parsed: Value, +) -> SkillIterationResult { + let findings = parsed + .get("findings") + .and_then(Value::as_array) + .map(|items| { + items + .iter() + .map(|item| { + let file = item + .get("file") + .and_then(Value::as_str) + .filter(|value| !value.trim().is_empty()) + .map(ToString::to_string) + .or_else(|| { + item.get("location") + .and_then(|value| value.get("file")) + .and_then(Value::as_str) + .filter(|value| !value.trim().is_empty()) + .map(ToString::to_string) + }); + + let line = parse_line_number(item.get("line")).or_else(|| { + parse_line_number(item.get("location").and_then(|value| value.get("line"))) + }); + + VulnerabilityFinding { + title: item + .get("title") + .and_then(Value::as_str) + .unwrap_or("Untitled finding") + .to_string(), + severity: item + .get("severity") + .and_then(Value::as_str) + .unwrap_or(&skill.severity) + .to_string(), + summary: item + .get("summary") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(), + evidence: item + .get("evidence") + .and_then(Value::as_array) + .map(|e| { + e.iter() + .filter_map(Value::as_str) + .map(ToString::to_string) + .collect::>() + }) + .unwrap_or_default(), + recommendation: item + .get("recommendation") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(), + file, + line, + } + }) + .collect::>() + }) + .unwrap_or_default(); + + let status = parsed + .get("status") + .and_then(Value::as_str) + .unwrap_or("completed") + .to_string(); + + let next_prompt = parsed + .get("next_prompt") + .and_then(Value::as_str) + .filter(|value| !value.trim().is_empty()) + .map(|text| MiniPrompt { + skill_id: skill.id.clone(), + text: text.to_string(), + }); + + SkillIterationResult { + skill_id: skill.id.clone(), + status, + findings, + next_prompt, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::commands::audit::model::PermissionPromptSpec; + + #[test] + fn execute_read_request_strict_allows_known_file() { + let temp = tempfile::tempdir().expect("temp dir"); + let root = temp.path(); + let file = root.join("validators/spend.ak"); + + std::fs::create_dir_all(file.parent().expect("parent")).expect("create dir"); + std::fs::write(&file, "validator spend {}\n").expect("write file"); + + let prompt = PermissionPromptSpec { + shell: "bash".to_string(), + allowed_commands: vec!["cat".to_string()], + scope_rules: vec![], + read_scope: "strict".to_string(), + interactive_permissions: false, + allowed_paths: vec!["validators/spend.ak".to_string()], + }; + + let output = execute_read_request( + &ReadRequest::ReadFile { + path: "validators/spend.ak".to_string(), + }, + &root.canonicalize().expect("canonical root"), + &prompt, + ) + .expect("request should be allowed"); + + assert!(output.contains("validator spend")); + } + + #[test] + fn execute_read_request_strict_rejects_list_dir() { + let temp = tempfile::tempdir().expect("temp dir"); + let root = temp.path(); + + let prompt = PermissionPromptSpec { + shell: "bash".to_string(), + allowed_commands: vec!["ls".to_string()], + scope_rules: vec![], + read_scope: "strict".to_string(), + interactive_permissions: false, + allowed_paths: vec!["validators/spend.ak".to_string()], + }; + + let err = execute_read_request( + &ReadRequest::ListDir { + path: ".".to_string(), + }, + &root.canonicalize().expect("canonical root"), + &prompt, + ) + .expect_err("strict scope should reject list_dir"); + + assert!(err.to_string().contains("strict read scope")); + } +} diff --git a/templates/aiken/report.md b/templates/aiken/report.md index 51af1bc..096e2dc 100644 --- a/templates/aiken/report.md +++ b/templates/aiken/report.md @@ -1,6 +1,5 @@ # Aiken Vulnerability Report -- Target: {{ target }} - Generated at: {{ generated_at }} ## Summary From 18a0b0b3e5f27ab0b8e45bf8fe8b1d8ee30764ee Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Tue, 24 Feb 2026 16:15:27 -0300 Subject: [PATCH 07/18] feat: enhance logging messages for audit process and improve output rendering --- src/commands/audit/mod.rs | 10 ++--- src/commands/audit/providers/anthropic.rs | 54 +++++++++-------------- src/commands/audit/providers/openai.rs | 54 +++++++++-------------- src/commands/audit/providers/shared.rs | 46 ++++++++++++------- 4 files changed, 76 insertions(+), 88 deletions(-) diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index 02ce04e..cd9ff5e 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -61,7 +61,7 @@ pub struct Args { #[arg(long)] pub api_key_env: Option, - /// Print interactive AI round-trip steps and local tool actions while auditing. + /// Print chat-style progress of model requests and local tool actions while auditing. #[arg(long, default_value_t = false)] pub ai_logs: bool, @@ -116,7 +116,7 @@ fn run_analysis( log_audit_progress( args.ai_logs, format!( - "[i] setup provider={} source_files={}", + "Starting audit • provider={} • source files={}", provider.provider_spec().name, source_files.len() ), @@ -190,7 +190,7 @@ fn run_skill_loop( log_audit_progress( ai_logs, format!( - "[ ] skill {}/{} start '{}' ({})", + "Skill {}/{} • analyzing '{}' ({})", skill_idx + 1, total_skills, skill.id, @@ -216,7 +216,7 @@ fn run_skill_loop( log_audit_progress( ai_logs, format!( - "[x] skill {}/{} done '{}' status={} findings={} (state persisted)", + "Skill {}/{} • completed '{}' • status={} • findings={} • state persisted", skill_idx + 1, total_skills, skill.id, @@ -231,7 +231,7 @@ fn run_skill_loop( fn log_audit_progress(enabled: bool, message: impl AsRef) { if enabled { - eprintln!("[audit][todo] {}", message.as_ref()); + eprintln!("🧭 {}", message.as_ref()); } } diff --git a/src/commands/audit/providers/anthropic.rs b/src/commands/audit/providers/anthropic.rs index bacb013..6d3afea 100644 --- a/src/commands/audit/providers/anthropic.rs +++ b/src/commands/audit/providers/anthropic.rs @@ -4,8 +4,9 @@ use std::path::Path; use super::shared::{ block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, - describe_read_request, execute_read_request, iteration_from_parsed, log_agent_progress, - parse_agent_action, preview_output_for_log, summarize_read_request, AgentAction, + describe_read_request_friendly, execute_read_request, iteration_from_parsed, log_agent_progress, + parse_agent_action, render_model_output_for_log, render_tool_output_for_log, + summarize_read_request, AgentAction, MAX_AGENT_STEPS, }; use super::{ @@ -60,10 +61,10 @@ impl AnalysisProvider for AnthropicProvider { log_agent_progress( self.ai_logs, format!( - "[ ] skill={} step={}/{} ask model at {}", - skill.id, + "Step {}/{} • requesting next action for skill '{}' ({})", step_idx + 1, MAX_AGENT_STEPS, + skill.id, self.endpoint ), ); @@ -102,6 +103,14 @@ impl AnalysisProvider for AnthropicProvider { "content": content, })); + log_agent_progress( + self.ai_logs, + format!( + "Model output:\n{}", + render_model_output_for_log(content, 2_000) + ), + ); + match parse_agent_action(content)? { AgentAction::Final(parsed) => { let findings = parsed @@ -117,7 +126,7 @@ impl AnalysisProvider for AnthropicProvider { log_agent_progress( self.ai_logs, format!( - "[x] skill={} step={}/{} final status={} findings={}", + "Model completed skill '{}' at step {}/{} • status={} • findings={}", skill.id, step_idx + 1, MAX_AGENT_STEPS, @@ -131,21 +140,15 @@ impl AnalysisProvider for AnthropicProvider { log_agent_progress( self.ai_logs, format!( - "[ ] skill={} step={}/{} {}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, - describe_read_request(&request) + "Model requested: {}", + describe_read_request_friendly(&request) ), ); log_agent_progress( self.ai_logs, format!( - "[ ] skill={} step={}/{} run local action: {}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, + "Running local action: {}", summarize_read_request(&request) ), ); @@ -156,32 +159,15 @@ impl AnalysisProvider for AnthropicProvider { log_agent_progress( self.ai_logs, format!( - "[x] skill={} step={}/{} local action finished chars={}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, - output.chars().count() - ), - ); - - log_agent_progress( - self.ai_logs, - format!( - "[i] skill={} step={}/{} output preview: {}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, - preview_output_for_log(&output, 180) + "Tool output:\n{}", + render_tool_output_for_log(&request, &output, 2_000) ), ); log_agent_progress( self.ai_logs, format!( - "[ ] skill={} step={}/{} send local output back to model", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS + "Sending tool output back to model" ), ); diff --git a/src/commands/audit/providers/openai.rs b/src/commands/audit/providers/openai.rs index 50344a2..f7aea54 100644 --- a/src/commands/audit/providers/openai.rs +++ b/src/commands/audit/providers/openai.rs @@ -4,8 +4,9 @@ use std::path::Path; use super::shared::{ block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, - describe_read_request, execute_read_request, iteration_from_parsed, log_agent_progress, - parse_agent_action, preview_output_for_log, summarize_read_request, AgentAction, + describe_read_request_friendly, execute_read_request, iteration_from_parsed, log_agent_progress, + parse_agent_action, render_model_output_for_log, render_tool_output_for_log, + summarize_read_request, AgentAction, MAX_AGENT_STEPS, }; use super::{ @@ -65,10 +66,10 @@ impl AnalysisProvider for OpenAiProvider { log_agent_progress( self.ai_logs, format!( - "[ ] skill={} step={}/{} ask model at {}", - skill.id, + "Step {}/{} • requesting next action for skill '{}' ({})", step_idx + 1, MAX_AGENT_STEPS, + skill.id, self.endpoint ), ); @@ -107,6 +108,14 @@ impl AnalysisProvider for OpenAiProvider { "content": content, })); + log_agent_progress( + self.ai_logs, + format!( + "Model output:\n{}", + render_model_output_for_log(content, 2_000) + ), + ); + match parse_agent_action(content)? { AgentAction::Final(parsed) => { let findings = parsed @@ -122,7 +131,7 @@ impl AnalysisProvider for OpenAiProvider { log_agent_progress( self.ai_logs, format!( - "[x] skill={} step={}/{} final status={} findings={}", + "Model completed skill '{}' at step {}/{} • status={} • findings={}", skill.id, step_idx + 1, MAX_AGENT_STEPS, @@ -136,21 +145,15 @@ impl AnalysisProvider for OpenAiProvider { log_agent_progress( self.ai_logs, format!( - "[ ] skill={} step={}/{} {}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, - describe_read_request(&request) + "Model requested: {}", + describe_read_request_friendly(&request) ), ); log_agent_progress( self.ai_logs, format!( - "[ ] skill={} step={}/{} run local action: {}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, + "Running local action: {}", summarize_read_request(&request) ), ); @@ -161,32 +164,15 @@ impl AnalysisProvider for OpenAiProvider { log_agent_progress( self.ai_logs, format!( - "[x] skill={} step={}/{} local action finished chars={}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, - output.chars().count() - ), - ); - - log_agent_progress( - self.ai_logs, - format!( - "[i] skill={} step={}/{} output preview: {}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, - preview_output_for_log(&output, 180) + "Tool output:\n{}", + render_tool_output_for_log(&request, &output, 2_000) ), ); log_agent_progress( self.ai_logs, format!( - "[ ] skill={} step={}/{} send local output back to model", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS + "Sending tool output back to model" ), ); diff --git a/src/commands/audit/providers/shared.rs b/src/commands/audit/providers/shared.rs index 35474f0..d0e112b 100644 --- a/src/commands/audit/providers/shared.rs +++ b/src/commands/audit/providers/shared.rs @@ -428,48 +428,64 @@ pub(super) fn summarize_read_request(request: &ReadRequest) -> String { } } -pub(super) fn describe_read_request(request: &ReadRequest) -> String { +pub(super) fn describe_read_request_friendly(request: &ReadRequest) -> String { match request { ReadRequest::ReadFile { path } => { - format!("assistant requested: read file '{}'", path) + format!("read file '{}'", path) } ReadRequest::Grep { pattern, path, context_lines, } => format!( - "assistant requested: search pattern '{}' in '{}' (context {} lines)", + "search '{}' in '{}' ({} context lines)", pattern, path, context_lines ), ReadRequest::ListDir { path } => { - format!("assistant requested: list directory '{}'", path) + format!("list directory '{}'", path) } ReadRequest::FindFiles { path, glob } => format!( - "assistant requested: find files in '{}' with glob '{}'", + "find files in '{}' with glob '{}'", path, glob.as_deref().unwrap_or("*") ), } } -pub(super) fn preview_output_for_log(output: &str, max_chars: usize) -> String { - let compact = output - .replace("\r\n", "\n") - .replace('\n', " ⏎ ") - .replace('\t', " "); +pub(super) fn render_tool_output_for_log( + request: &ReadRequest, + output: &str, + max_chars: usize, +) -> String { + match request { + ReadRequest::ReadFile { path } => { + format!( + "📄 Archivo '{}' leído (contenido oculto en logs, {} chars)", + path, + output.chars().count() + ) + } + _ => truncate_for_log(output, max_chars), + } +} + +pub(super) fn render_model_output_for_log(output: &str, max_chars: usize) -> String { + truncate_for_log(output, max_chars) +} - let char_count = compact.chars().count(); +fn truncate_for_log(output: &str, max_chars: usize) -> String { + let char_count = output.chars().count(); if char_count <= max_chars { - return compact; + return output.to_string(); } - let preview = compact.chars().take(max_chars).collect::(); - format!("{}… ({} chars total)", preview, char_count) + let preview = output.chars().take(max_chars).collect::(); + format!("{}\n… (truncated, {} chars total)", preview, char_count) } pub(super) fn log_agent_progress(enabled: bool, message: impl AsRef) { if enabled { - eprintln!("[audit:ai][todo] {}", message.as_ref()); + eprintln!("🤖 {}", message.as_ref()); } } From a368e966c3f0683e05172c7f0087fa6ecff81146 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Tue, 24 Feb 2026 16:43:06 -0300 Subject: [PATCH 08/18] feat: refactor run_skill_loop to use SkillLoopContext and improve logging messages --- src/commands/audit/mod.rs | 51 +++++++++++++---------- src/commands/audit/providers/anthropic.rs | 4 +- src/commands/audit/providers/openai.rs | 4 +- src/commands/audit/providers/shared.rs | 2 +- 4 files changed, 31 insertions(+), 30 deletions(-) diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index cd9ff5e..89c6aba 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -143,16 +143,16 @@ fn run_analysis( write_state(&state_out, &state)?; - run_skill_loop( - &skills, - &source_files, - &project_root, - &permission_prompt, + let skill_loop_context = SkillLoopContext { + source_files: &source_files, + project_root: &project_root, + permission_prompt: &permission_prompt, provider, - args.ai_logs, - &mut state, - &state_out, - )?; + ai_logs: args.ai_logs, + state_out: &state_out, + }; + + run_skill_loop(&skills, &mut state, skill_loop_context)?; let report = build_report(&state); let report_markdown = render_report_markdown(&report); @@ -169,26 +169,31 @@ fn run_analysis( Ok(()) } +struct SkillLoopContext<'a> { + source_files: &'a [PathBuf], + project_root: &'a Path, + permission_prompt: &'a PermissionPromptSpec, + provider: &'a dyn AnalysisProvider, + ai_logs: bool, + state_out: &'a Path, +} + fn run_skill_loop( skills: &[VulnerabilitySkill], - source_files: &[PathBuf], - project_root: &Path, - permission_prompt: &PermissionPromptSpec, - provider: &dyn AnalysisProvider, - ai_logs: bool, state: &mut AnalysisStateJson, - state_out: &Path, + context: SkillLoopContext<'_>, ) -> Result<()> { - let source_references = source_files + let source_references = context + .source_files .iter() - .map(|path| display_path_for_prompt(project_root, path)) + .map(|path| display_path_for_prompt(context.project_root, path)) .collect::>(); let total_skills = skills.len(); for (skill_idx, skill) in skills.iter().enumerate() { log_audit_progress( - ai_logs, + context.ai_logs, format!( "Skill {}/{} • analyzing '{}' ({})", skill_idx + 1, @@ -199,22 +204,22 @@ fn run_skill_loop( ); let prompt = build_mini_prompt(skill); - let iteration = provider.analyze_skill( + let iteration = context.provider.analyze_skill( skill, &prompt, &source_references, - project_root, - permission_prompt, + context.project_root, + context.permission_prompt, )?; let findings_count = iteration.findings.len(); let status = iteration.status.clone(); append_iteration(state, iteration); - write_state(state_out, state)?; + write_state(context.state_out, state)?; log_audit_progress( - ai_logs, + context.ai_logs, format!( "Skill {}/{} • completed '{}' • status={} • findings={} • state persisted", skill_idx + 1, diff --git a/src/commands/audit/providers/anthropic.rs b/src/commands/audit/providers/anthropic.rs index 6d3afea..cfc4759 100644 --- a/src/commands/audit/providers/anthropic.rs +++ b/src/commands/audit/providers/anthropic.rs @@ -166,9 +166,7 @@ impl AnalysisProvider for AnthropicProvider { log_agent_progress( self.ai_logs, - format!( - "Sending tool output back to model" - ), + "Sending tool output back to model", ); messages.push(serde_json::json!({ diff --git a/src/commands/audit/providers/openai.rs b/src/commands/audit/providers/openai.rs index f7aea54..652d6e5 100644 --- a/src/commands/audit/providers/openai.rs +++ b/src/commands/audit/providers/openai.rs @@ -171,9 +171,7 @@ impl AnalysisProvider for OpenAiProvider { log_agent_progress( self.ai_logs, - format!( - "Sending tool output back to model" - ), + "Sending tool output back to model", ); messages.push(serde_json::json!({ diff --git a/src/commands/audit/providers/shared.rs b/src/commands/audit/providers/shared.rs index d0e112b..f0cb7ea 100644 --- a/src/commands/audit/providers/shared.rs +++ b/src/commands/audit/providers/shared.rs @@ -460,7 +460,7 @@ pub(super) fn render_tool_output_for_log( match request { ReadRequest::ReadFile { path } => { format!( - "📄 Archivo '{}' leído (contenido oculto en logs, {} chars)", + "📄 File '{}' read (content hidden in logs, {} chars)", path, output.chars().count() ) From a81fc0aa67351d898ef7138b597be529ec2f54c6 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Tue, 24 Feb 2026 17:27:52 -0300 Subject: [PATCH 09/18] feat: update vulnerability skills and enhance audit command implementation --- .../003-ai-aiken-vulnerability-scaffolding.md | 2 +- design/004-audit-implementation-spec.md | 420 ++++++++++++++++++ .../vulnerabilities/001-state-transition.md | 33 -- ...uality.md => 001-strict-value-equality.md} | 2 +- .../vulnerabilities/002-authz-boundaries.md | 33 -- src/commands/audit/mod.rs | 20 +- tests/e2e/happy_path.rs | 7 +- 7 files changed, 431 insertions(+), 86 deletions(-) create mode 100644 design/004-audit-implementation-spec.md delete mode 100644 skills/vulnerabilities/001-state-transition.md rename skills/vulnerabilities/{003-strict-value-equality.md => 001-strict-value-equality.md} (97%) delete mode 100644 skills/vulnerabilities/002-authz-boundaries.md diff --git a/design/003-ai-aiken-vulnerability-scaffolding.md b/design/003-ai-aiken-vulnerability-scaffolding.md index d7774d4..0d91260 100644 --- a/design/003-ai-aiken-vulnerability-scaffolding.md +++ b/design/003-ai-aiken-vulnerability-scaffolding.md @@ -111,7 +111,7 @@ Example (simplified): }, "iterations": [ { - "skill_id": "strict-value-equality-003", + "skill_id": "strict-value-equality-001", "status": "completed", "findings": [ { diff --git a/design/004-audit-implementation-spec.md b/design/004-audit-implementation-spec.md new file mode 100644 index 0000000..aba1412 --- /dev/null +++ b/design/004-audit-implementation-spec.md @@ -0,0 +1,420 @@ +# Audit Command Implementation Spec + +## Status + +This document captures the **currently implemented behavior** of `trix audit` as an implementation-spec companion to [003-ai-aiken-vulnerability-scaffolding.md](003-ai-aiken-vulnerability-scaffolding.md). + +## Scope + +In-scope: +- Full CLI contract currently accepted by `trix audit` +- Runtime behavior of the skill loop +- State/report output contracts as implemented +- Provider behavior (`scaffold`, `openai`, `anthropic`, `ollama`) +- Local read-tool permission and scope enforcement +- Current test-backed acceptance behavior + +Out-of-scope: +- Future UX redesigns +- Non-Aiken source language support + +## Command Surface + +`trix audit` is a **scoped command** (requires `trix.toml` in cwd). + +It is **hidden + unstable-gated**: +- Hidden in clap command listing (`#[command(hide = true)]`) +- Returns an error unless compiled with `--features unstable` + +### CLI Arguments (current) + +```bash +trix audit \ + [--state-out ] \ + [--report-out ] \ + [--skills-dir ] \ + [--provider ] \ + [--endpoint ] \ + [--model ] \ + [--api-key-env ] \ + [--ai-logs] \ + [--read-scope ] \ + [--interactive-permissions] +``` + +Defaults: +- `--state-out`: `.tx3/audit/state.json` +- `--report-out`: `.tx3/audit/vulnerabilities.md` +- `--skills-dir`: `skills/vulnerabilities` +- `--provider`: `scaffold` +- `--read-scope`: `workspace` +- `--ai-logs`: `false` +- `--interactive-permissions`: `false` + +### Provider arguments (required behavior) + +The following arguments are interpreted with provider-specific defaults: + +- `--provider` + - Supported values: `scaffold`, `openai`, `anthropic`, `ollama` + - Any other value must fail with an unsupported provider error + +- `--endpoint` + - Optional override for provider API URL + - Default when omitted: + - `openai`: `https://api.openai.com/v1/chat/completions` + - `anthropic`: `https://api.anthropic.com/v1/messages` + - `ollama`: `http://localhost:11434/v1/chat/completions` + - `scaffold`: not used + +- `--model` + - Optional model override + - Default when omitted: + - `openai`: `gpt-4.1-mini` + - `anthropic`: `claude-3-5-haiku-latest` + - `ollama`: `llama3.1` + - `scaffold`: not used + +- `--api-key-env` + - Optional environment-variable name override for API credentials + - Default when omitted: + - `openai`: `OPENAI_API_KEY` + - `anthropic`: `ANTHROPIC_API_KEY` + - `ollama`: not required (fixed placeholder token is used) + - `scaffold`: not required + - Runtime behavior: + - `openai` and `anthropic` must fail early if the resolved env var is not set + - `ollama` does not read env credentials and uses `ollama` as a fixed API key string + +- `--ai-logs` + - When enabled, prints iterative model/tool progress logs to stderr + - Logs include step counts, requested local actions, and (truncated) model/tool output + +Examples: + +```bash +# OpenAI with defaults +trix audit --provider openai + +# OpenAI with endpoint/model/api key env overrides +trix audit --provider openai \ + --endpoint https://example.com/v1/chat/completions \ + --model gpt-4.1 \ + --api-key-env MY_OPENAI_KEY + +# Anthropic default endpoint + model +trix audit --provider anthropic + +# Ollama local runtime +trix audit --provider ollama --ai-logs +``` + +## High-Level Execution Flow + +1. Build provider from args. +2. Determine `project_root = current_dir`. +3. Discover source files recursively under project root: + - Include: `*.ak` + - Skip directories: `.git`, `target`, `.tx3`, `build` +4. If no `.ak` files were found, fallback to `config.protocol.main` as a source reference. +5. Build `PermissionPromptSpec` based on `read_scope` and `interactive_permissions`. +6. Load skills from `--skills-dir`. + - If directory is missing and arg is default `skills/vulnerabilities`, load embedded seed skills. + - If directory is missing and arg is custom, fail. +7. Initialize `AnalysisStateJson` with empty iterations and write it immediately. +8. For each skill in sorted order: + - Compose mini-prompt from skill metadata/body. + - Call provider `analyze_skill(...)`. + - Append iteration to state. + - Persist full state JSON after each skill. +9. Build aggregated report from all findings. +10. Render markdown via template and write report file. +11. Print completion summary to stdout. + +## Data Contracts (Implemented) + +Defined in `src/commands/audit/model.rs`. + +### `VulnerabilitySkill` +Required semantic fields: +- `id`, `name`, `severity`, `description`, `prompt_fragment` + +Optional/collection fields (default empty if missing): +- `examples`, `false_positives`, `references`, `tags` +- `confidence_hint` optional string +- `guidance_markdown` from markdown body (post-frontmatter) + +### `AnalysisStateJson` +```json +{ + "version": "1", + "source_files": ["..."], + "provider": { + "name": "...", + "model": "... or null", + "notes": "..." + }, + "permission_prompt": { + "shell": "bash", + "allowed_commands": ["grep", "cat", "find", "ls"], + "scope_rules": ["..."], + "read_scope": "workspace|strict", + "interactive_permissions": false, + "allowed_paths": ["..."] + }, + "iterations": [ + { + "skill_id": "...", + "status": "completed|scaffolded|...", + "findings": [ + { + "title": "...", + "severity": "...", + "summary": "...", + "evidence": ["..."], + "recommendation": "...", + "file": "optional", + "line": 42 + } + ], + "next_prompt": { + "skill_id": "...", + "text": "..." + } + } + ] +} +``` + +### `VulnerabilityReportSpec` +- `title` +- `generated_at` (UTC RFC3339) +- `findings` (flattened from all iterations) + +## Skill File Contract (Implemented Parser) + +Each skill file must be markdown with YAML frontmatter delimited by `---`. + +Rules: +- Missing frontmatter delimiters => error +- Unknown frontmatter fields => error (`deny_unknown_fields`) +- Required string fields must be non-empty after trim +- `severity` must be one of: `low|medium|high|critical` (case-normalized to lowercase) +- Tabs in frontmatter are normalized to two spaces before YAML parse +- Markdown body after frontmatter is stored in `guidance_markdown` + +## Prompt Construction + +Per skill, a mini-prompt is composed from: +- `Skill ID` +- `Name` +- `Severity` +- `Description` +- `Prompt Fragment` +- Optional sections for tags/hint/examples/false positives/references/guidance markdown + +Provider initial prompt includes: +- Mini-prompt text +- Referenced source files list +- Allowed commands + scope rules from `PermissionPromptSpec` + +## Permission Model and Local Tooling + +Allowed tool actions requested by model: +- `read_file` +- `grep` +- `list_dir` +- `find_files` +- `final` + +Mapped local commands: +- `read_file` -> `cat` +- `grep` -> `grep -n -C -- ` +- `list_dir` -> `ls -la ` +- `find_files` -> `find -type f [-name ]` + +Global safeguards: +- Requested path must canonicalize successfully +- Canonical path must remain under project root +- Command must be in `allowed_commands` +- Output truncation at 30,000 chars + +### Read scope modes + +`workspace`: +- Reads/searches over any path under project root + +`strict`: +- Denies `list_dir` and `find_files` +- Allows reads/searches only on regular files listed in `permission_prompt.allowed_paths` +- `allowed_paths` is populated from discovered source files (displayed relative paths) + +### Interactive permissions + +If enabled: +- Each local read request prompts `Allow this request? [y/N]:` +- Non-yes response denies request with an explicit error + +## Providers (Current) + +### `scaffold` +- No network calls +- Returns one iteration with: + - `status = scaffolded` + - empty findings + - placeholder `next_prompt` + +### `openai` +- Provider spec: + - `name = openai-compatible` + - `notes = Endpoint: ` +- Defaults: + - endpoint: `https://api.openai.com/v1/chat/completions` + - model: `gpt-4.1-mini` + - api key env: `OPENAI_API_KEY` +- Request shape: + - `model`, `messages`, `response_format: { type: json_object }` + - auth: Bearer API key +- Response extraction: + - `/choices/0/message/content` (string JSON) +- Iterative loop: + - max 25 steps (`MAX_AGENT_STEPS`) + - parse model output as action (`read request` or `final`) + - execute local read request and feed output back as user message + +### `anthropic` +- Provider spec: + - `name = anthropic` + - `notes = Endpoint: ` +- Defaults: + - endpoint: `https://api.anthropic.com/v1/messages` + - model: `claude-3-5-haiku-latest` + - api key env: `ANTHROPIC_API_KEY` + - version header: `2023-06-01` +- Request shape: + - `model`, `max_tokens`, `system`, `messages` + - headers: `x-api-key`, `anthropic-version` +- Response extraction: + - `/content/0/text` (string JSON) +- Same 25-step interactive read loop as `openai` + +### `ollama` +- Implemented via `OpenAiProvider` compatibility +- Defaults: + - endpoint: `http://localhost:11434/v1/chat/completions` + - model: `llama3.1` + - api key literal: `ollama` + +## Parsing of AI Output + +Accepted model output forms: +- Raw JSON object +- JSON inside fenced blocks (```json ... ``` or ``` ... ```) + +Action interpretation: +- If `action` missing but payload has `findings` or `status` => treated as `final` +- `final` payload is converted into `SkillIterationResult` +- `findings[*].line` can be number or numeric string +- Also supports nested fallback location fields: + - `location.file` + - `location.line` + +Defaults when missing: +- iteration status: `completed` +- finding title: `Untitled finding` +- finding severity: skill severity +- other finding text fields default to empty string + +## Output Rendering + +Report template: `templates/aiken/report.md` + +Findings markdown rendering: +- Empty findings => `- *(none)*` +- Per finding include title, severity, summary, recommendation +- Include `Location` line when `file` and/or `line` available + +Permission template file exists (`templates/aiken/permission_prompt.md`) but current runtime behavior constructs prompt data directly from `PermissionPromptSpec` and does not render this template for provider calls. + +## Embedded Seed Skills + +When using default `--skills-dir` and path is absent, embedded content is loaded from: +- `skills/vulnerabilities/001-strict-value-equality.md` + +## Current Acceptance Signals (Tests) + +E2E tests assert: +- `audit --help` works with unstable feature +- `audit` fails without `trix.toml` (scoped command requirement) +- `audit` fails for missing custom skills dir +- `audit` succeeds after `init --yes` +- Outputs are created: + - `.tx3/audit/state.json` + - `.tx3/audit/vulnerabilities.md` +- State contract basics: + - `version == "1"` + - `iterations.len() == 3` for seed skills + +Unit tests assert: +- Skill parser behavior and validation errors +- Source discovery recursion and ignored directories +- Strict read scope allows known file and rejects directory listing +- Report markdown includes location formatting + +## Specification Evolution Notes + +The following items represent milestone evolution from initial scaffolding to current implementation: + +1. **Real provider integrations now exist** (`openai`, `anthropic`, `ollama`), not contract-only. +2. **Interactive read tool loop is implemented** with bounded local command execution. +3. **Additional CLI controls exist** (`endpoint`, `model`, `api_key_env`, `ai_logs`, `read_scope`, `interactive_permissions`). +4. **Strict/workspace read scopes are enforced in code**. +5. **Seed skill fallback is embedded** when default skills directory is not found. +6. **Permission prompt template is currently not part of runtime rendering path**. + +## Spec-Driven Viability Assessment + +Using this document for spec-driven development of the current `audit` behavior is **viable**. + +This section upgrades the contract into strict spec-first form via: +- normative requirement levels (`MUST`/`SHOULD`) +- requirement-to-test traceability +- canonical golden fixtures + +## Normative Requirements + +### MUST (behavior compatibility) + +- Same CLI flags, defaults, and unstable gating behavior. +- Same provider selection and provider-specific defaults/env handling. +- Same `.ak` discovery semantics and skipped directories. +- Same skills parsing rules (frontmatter, required fields, severity enum, unknown-field rejection). +- Same iterative per-skill persistence to state JSON. +- Same read-request action schema and local command mapping. +- Same path confinement and strict/workspace enforcement. +- Same max step guard (`25`) and command output truncation (`30_000` chars). +- Same report generation shape and findings rendering. +- Same seed-skill fallback behavior and baseline test outcomes. + +### SHOULD (implementation quality) + +- Keep provider/network and local-tooling boundaries separated behind provider adapter interfaces. +- Preserve deterministic ordering where current implementation sorts inputs/paths. +- Preserve error messages close to current wording when feasible, to reduce e2e churn. +- Keep state/report writes atomic at logical checkpoints (initial state + post-iteration). + +## Requirement-to-Test Traceability + +| Requirement | Test anchors | +|---|---| +| CLI visibility and unstable behavior | `tests/e2e/smoke.rs::audit_help_runs_without_error`, `tests/e2e/smoke.rs::audit_help_displays_provider_options` | +| Scoped command requirement (`trix.toml`) | `tests/e2e/edge_cases.rs::aiken_audit_fails_without_trix_config` | +| Missing custom skills dir failure | `tests/e2e/edge_cases.rs::aiken_audit_fails_with_missing_skills_dir` | +| Baseline success path + output artifacts | `tests/e2e/happy_path.rs::aiken_audit_runs_in_initialized_project` | +| State shape baseline (`version`, seed iterations) | `tests/e2e/happy_path.rs::aiken_audit_runs_in_initialized_project` | +| Skill parser frontmatter/body behavior | `src/commands/audit/mod.rs::parse_skill_content_reads_frontmatter_and_guidance` | +| Skill parser validation failures | `src/commands/audit/mod.rs::parse_skill_content_requires_frontmatter`, `src/commands/audit/mod.rs::parse_skill_content_rejects_invalid_severity` | +| Source discovery recursion and filtering | `src/commands/audit/mod.rs::discover_source_files_finds_ak_files_recursively`, `src/commands/audit/mod.rs::discover_source_files_skips_target_tx3_and_build_dirs` | +| Strict read-scope allows known file | `src/commands/audit/providers/shared.rs::execute_read_request_strict_allows_known_file` | +| Strict read-scope denies directory listing | `src/commands/audit/providers/shared.rs::execute_read_request_strict_rejects_list_dir` | +| Report location rendering contract | `src/commands/audit/mod.rs::render_findings_markdown_includes_location_when_available` | diff --git a/skills/vulnerabilities/001-state-transition.md b/skills/vulnerabilities/001-state-transition.md deleted file mode 100644 index 9c678de..0000000 --- a/skills/vulnerabilities/001-state-transition.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -id: state-transition-001 -name: Unsafe state transition validation -severity: high -description: Ensure transitions are fully guarded by explicit preconditions and invariants. -prompt_fragment: Review all state transition paths and identify missing or bypassable validation checks. -examples: - - A transition branch updates state without validating current state/version. - - A fallback branch bypasses checks that are present in the main path. -false_positives: - - Branches that are unreachable due to upstream exhaustive pattern matching. -references: - - https://plutus.cardano.intersectmbo.org/ -tags: - - state-machine - - invariants -confidence_hint: medium ---- - -# When to use - -Use this skill when auditing validators or state machines that evolve datum/state across transactions. - -# Detection instructions - -1. Enumerate every possible transition branch. -2. Verify explicit preconditions for each branch (state shape, signer set, timing/value gates). -3. Check for bypasses where validation exists in one branch but not in another. -4. Confirm invariants are preserved before and after transitions. - -# Reporting guidance - -Prefer findings with concrete branch/path evidence and explain why a transition can be bypassed or made inconsistent. diff --git a/skills/vulnerabilities/003-strict-value-equality.md b/skills/vulnerabilities/001-strict-value-equality.md similarity index 97% rename from skills/vulnerabilities/003-strict-value-equality.md rename to skills/vulnerabilities/001-strict-value-equality.md index 5ecd80b..1837f60 100644 --- a/skills/vulnerabilities/003-strict-value-equality.md +++ b/skills/vulnerabilities/001-strict-value-equality.md @@ -1,5 +1,5 @@ --- -id: strict-value-equality-003 +id: strict-value-equality-001 name: Strict value equality on ADA or full Value severity: high description: Detect unsatisfiable validator constraints caused by exact equality checks on ADA or complete output values. diff --git a/skills/vulnerabilities/002-authz-boundaries.md b/skills/vulnerabilities/002-authz-boundaries.md deleted file mode 100644 index 050d009..0000000 --- a/skills/vulnerabilities/002-authz-boundaries.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -id: authz-boundaries-002 -name: Authorization boundary bypass -severity: high -description: Validate signer and role checks for every sensitive branch. -prompt_fragment: Find code paths where authorization assumptions are implicit or can be bypassed. -examples: - - Sensitive branch checks datum fields but does not verify signer identity. - - A role check exists only in one constructor case and not in another. -false_positives: - - Purely read-only branches that cannot trigger state/value changes. -references: - - https://plutus.cardano.intersectmbo.org/ -tags: - - authz - - signers -confidence_hint: medium ---- - -# When to use - -Use this skill for any validator path that can move value, mutate state, or grant privileges. - -# Detection instructions - -1. List all privileged operations and their entry branches. -2. Verify signer checks and role assertions are explicit in each branch. -3. Identify implicit assumptions (e.g., relying on script purpose without signer validation). -4. Ensure negative paths cannot reach privileged effects. - -# Reporting guidance - -Include the exact branch/function where authz is missing and a realistic abuse scenario. diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index 89c6aba..4ac0fe2 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -5,8 +5,8 @@ use std::path::{Path, PathBuf}; use crate::config::{ProfileConfig, RootConfig}; -mod model; -mod providers; +pub mod model; +pub mod providers; use self::model::{ AnalysisStateJson, MiniPrompt, PermissionPromptSpec, SkillIterationResult, @@ -445,16 +445,8 @@ fn load_skills(skills_dir: &Path, skills_dir_arg: &str) -> Result Result> { let seed_files = [ ( - Path::new("skills/vulnerabilities/001-state-transition.md"), - include_str!("../../../skills/vulnerabilities/001-state-transition.md"), - ), - ( - Path::new("skills/vulnerabilities/002-authz-boundaries.md"), - include_str!("../../../skills/vulnerabilities/002-authz-boundaries.md"), - ), - ( - Path::new("skills/vulnerabilities/003-strict-value-equality.md"), - include_str!("../../../skills/vulnerabilities/003-strict-value-equality.md"), + Path::new("skills/vulnerabilities/001-strict-value-equality.md"), + include_str!("../../../skills/vulnerabilities/001-strict-value-equality.md"), ), ]; @@ -595,7 +587,7 @@ mod tests { #[test] fn parse_skill_content_reads_frontmatter_and_guidance() { let content = r#"--- -id: strict-value-equality-003 +id: strict-value-equality-001 name: Strict value equality severity: high description: Detect strict equality checks for ADA. @@ -613,7 +605,7 @@ Check validator outputs and avoid false positives for without_lovelace(). let skill = parse_skill_content(Path::new("skill.md"), content).expect("should parse"); - assert_eq!(skill.id, "strict-value-equality-003"); + assert_eq!(skill.id, "strict-value-equality-001"); assert_eq!(skill.name, "Strict value equality"); assert_eq!(skill.severity, "high"); assert_eq!(skill.examples.len(), 1); diff --git a/tests/e2e/happy_path.rs b/tests/e2e/happy_path.rs index 7083030..496508d 100644 --- a/tests/e2e/happy_path.rs +++ b/tests/e2e/happy_path.rs @@ -167,9 +167,8 @@ fn aiken_audit_runs_in_initialized_project() { serde_json::from_str(&state_content).expect("state.json should be valid AnalysisStateJson"); assert_eq!(state.version, "1"); - assert_eq!( - state.iterations.len(), - 3, - "expected one iteration per seed skill" + assert!( + !state.iterations.is_empty(), + "expected at least one analysis iteration" ); } From 2a95fd79af5a8ba628f47338588e809d55a045c3 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Tue, 24 Feb 2026 17:42:44 -0300 Subject: [PATCH 10/18] feat: update vulnerability skills --- .../001-strict-value-equality.md | 39 ++++++++----------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/skills/vulnerabilities/001-strict-value-equality.md b/skills/vulnerabilities/001-strict-value-equality.md index 1837f60..9701152 100644 --- a/skills/vulnerabilities/001-strict-value-equality.md +++ b/skills/vulnerabilities/001-strict-value-equality.md @@ -1,35 +1,28 @@ --- id: strict-value-equality-001 -name: Strict value equality on ADA or full Value +name: strict-value-equality severity: high -description: Detect unsatisfiable validator constraints caused by exact equality checks on ADA or complete output values. +description: Vulnerabilities related to strict value equality in the protocol. prompt_fragment: Read validator scripts and flag strict equality checks on ADA or full output values; treat comparisons using without_lovelace() as acceptable and not strict ADA equality. -examples: - - output.value == expected_value - - output.value.lovelace == exact_amount -false_positives: - - Comparisons using without_lovelace() to ignore ADA component. - - Checks that enforce minimum lovelace instead of exact equality. -references: - - https://plutus.cardano.intersectmbo.org/ -tags: - - value - - lovelace - - constraints confidence_hint: medium --- -# When to use +# strict-value-equality -Use this skill whenever validators compare output values or ADA amounts for equality. +Validators could become unsatisfiable when enforcing exact equality on ADA or full output values. +Exact value equality is almost always incorrect for ADA in Plutus V2. Validators should enforce minimums, not exact amounts, unless there is a very strong invariant requiring exact equality. -# Detection instructions +## When to use -1. Find equality checks on full values and lovelace amounts. -2. Flag exact equality constraints that can become unsatisfiable due to fees/min-ADA variability. -3. Accept checks using `without_lovelace()` as intentional ADA-agnostic comparisons. -4. Prefer invariants based on lower bounds for ADA, unless a strict invariant is explicitly justified. +Every time a search for vulnerabilities related to strict value equality in the protocol is explicitly requested by the user. -# Reporting guidance +## Instructions -Include the equality expression and explain why it can fail in realistic transaction construction. \ No newline at end of file +1. Read the validator scripts of the protocol and identify any instances where strict value equality is enforced on ADA or full output values. +2. Take into account that values compared using `without_lovelace()` are not considered strict equality, as they ignore the ADA component. + +## Reporting guidance + +- Include the exact equality expression and where it appears. +- Explain why it can make the validator unsatisfiable in realistic transaction construction. +- Recommend replacing strict equality on ADA with a minimum-bound check when possible. \ No newline at end of file From 06afc264623c65e1c096a1d86b24d219d0fbc66e Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Wed, 25 Feb 2026 17:22:03 -0300 Subject: [PATCH 11/18] feat: add Aiken prompt templates and refactor user prompt construction --- src/commands/audit/providers/anthropic.rs | 14 ++++++---- src/commands/audit/providers/openai.rs | 14 ++++++---- src/commands/audit/providers/shared.rs | 28 +++++++++++++------ .../aiken/audit_agent_initial_user_prompt.md | 20 +++++++++++++ templates/aiken/audit_agent_system_prompt.md | 12 ++++++++ .../aiken/audit_agent_tool_result_prompt.md | 4 +++ 6 files changed, 72 insertions(+), 20 deletions(-) create mode 100644 templates/aiken/audit_agent_initial_user_prompt.md create mode 100644 templates/aiken/audit_agent_system_prompt.md create mode 100644 templates/aiken/audit_agent_tool_result_prompt.md diff --git a/src/commands/audit/providers/anthropic.rs b/src/commands/audit/providers/anthropic.rs index cfc4759..010096e 100644 --- a/src/commands/audit/providers/anthropic.rs +++ b/src/commands/audit/providers/anthropic.rs @@ -4,6 +4,7 @@ use std::path::Path; use super::shared::{ block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, + build_tool_result_user_prompt, describe_read_request_friendly, execute_read_request, iteration_from_parsed, log_agent_progress, parse_agent_action, render_model_output_for_log, render_tool_output_for_log, summarize_read_request, AgentAction, @@ -50,7 +51,12 @@ impl AnalysisProvider for AnthropicProvider { })?; let system_prompt = build_agent_system_prompt(); - let initial_user_prompt = build_initial_user_prompt(prompt, source_references, permission_prompt); + let initial_user_prompt = build_initial_user_prompt( + prompt, + source_references, + &canonical_root, + permission_prompt, + ); let mut messages = vec![serde_json::json!({ "role": "user", @@ -171,11 +177,7 @@ impl AnalysisProvider for AnthropicProvider { messages.push(serde_json::json!({ "role": "user", - "content": format!( - "Tool result for {:?}:\n{}\n\nContinue and return JSON.", - request, - output - ), + "content": build_tool_result_user_prompt(&request, &output), })); } } diff --git a/src/commands/audit/providers/openai.rs b/src/commands/audit/providers/openai.rs index 652d6e5..624d9ec 100644 --- a/src/commands/audit/providers/openai.rs +++ b/src/commands/audit/providers/openai.rs @@ -4,6 +4,7 @@ use std::path::Path; use super::shared::{ block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, + build_tool_result_user_prompt, describe_read_request_friendly, execute_read_request, iteration_from_parsed, log_agent_progress, parse_agent_action, render_model_output_for_log, render_tool_output_for_log, summarize_read_request, AgentAction, @@ -49,7 +50,12 @@ impl AnalysisProvider for OpenAiProvider { })?; let system_prompt = build_agent_system_prompt(); - let initial_user_prompt = build_initial_user_prompt(prompt, source_references, permission_prompt); + let initial_user_prompt = build_initial_user_prompt( + prompt, + source_references, + &canonical_root, + permission_prompt, + ); let mut messages = vec![ serde_json::json!({ @@ -176,11 +182,7 @@ impl AnalysisProvider for OpenAiProvider { messages.push(serde_json::json!({ "role": "user", - "content": format!( - "Tool result for {:?}:\n{}\n\nContinue and return JSON.", - request, - output - ), + "content": build_tool_result_user_prompt(&request, &output), })); } } diff --git a/src/commands/audit/providers/shared.rs b/src/commands/audit/providers/shared.rs index f0cb7ea..57388bc 100644 --- a/src/commands/audit/providers/shared.rs +++ b/src/commands/audit/providers/shared.rs @@ -13,6 +13,12 @@ use crate::commands::audit::model::{ pub(super) const MAX_AGENT_STEPS: usize = 25; const MAX_COMMAND_OUTPUT_CHARS: usize = 30_000; +const AGENT_SYSTEM_PROMPT: &str = + include_str!("../../../../templates/aiken/audit_agent_system_prompt.md"); +const INITIAL_USER_PROMPT_TEMPLATE: &str = + include_str!("../../../../templates/aiken/audit_agent_initial_user_prompt.md"); +const TOOL_RESULT_PROMPT_TEMPLATE: &str = + include_str!("../../../../templates/aiken/audit_agent_tool_result_prompt.md"); #[derive(Debug)] pub(super) enum AgentAction { @@ -49,7 +55,7 @@ struct RawReadRequest { } pub(super) fn build_agent_system_prompt() -> &'static str { - "You are a security auditor specialized in Aiken smart contracts. You must return JSON only. Use an iterative process: request local reads when needed, then finish with findings.\n\nValid JSON actions:\n1) {\"action\":\"read_file\",\"path\":\"relative/path.ak\"}\n2) {\"action\":\"grep\",\"pattern\":\"regex\",\"path\":\"relative/path/or/dir\",\"context_lines\":2}\n3) {\"action\":\"list_dir\",\"path\":\"relative/path\"}\n4) {\"action\":\"find_files\",\"path\":\"relative/path\",\"glob\":\"*.ak\"}\n5) {\"action\":\"final\",\"status\":\"completed|scaffolded\",\"findings\":[{\"title\":string,\"severity\":string,\"summary\":string,\"evidence\":[string],\"recommendation\":string,\"file\":string|null,\"line\":number|null}],\"next_prompt\":string|null}\n\nPrefer returning file and line whenever you can confidently identify where the bug exists or where the recommendation applies.\n\nNever include markdown fences." + AGENT_SYSTEM_PROMPT } fn parse_line_number(value: Option<&Value>) -> Option { @@ -67,17 +73,23 @@ fn parse_line_number(value: Option<&Value>) -> Option { pub(super) fn build_initial_user_prompt( prompt: &MiniPrompt, source_references: &[String], + workspace_root: &Path, permission_prompt: &PermissionPromptSpec, ) -> String { - format!( - "Analyze Aiken code for this single vulnerability skill. You are given file references only (no source code inline).\n\nSkill:\n{}\n\nReferenced Aiken files:\n{}\n\nAllowed read commands: {}\nScope rules:\n- {}\n\nReturn JSON action only.", - prompt.text, - render_source_references(source_references), - permission_prompt.allowed_commands.join(", "), - permission_prompt.scope_rules.join("\n- "), - ) + INITIAL_USER_PROMPT_TEMPLATE + .replace("{{SKILL}}", &prompt.text) + .replace("{{WORKSPACE_ROOT}}", &workspace_root.display().to_string()) + .replace("{{SOURCE_REFERENCES}}", &render_source_references(source_references)) + .replace("{{ALLOWED_COMMANDS}}", &permission_prompt.allowed_commands.join(", ")) + .replace("{{SCOPE_RULES}}", &permission_prompt.scope_rules.join("\n- ")) } + pub(super) fn build_tool_result_user_prompt(request: &ReadRequest, output: &str) -> String { + TOOL_RESULT_PROMPT_TEMPLATE + .replace("{{REQUEST}}", &format!("{:?}", request)) + .replace("{{OUTPUT}}", output) + } + fn render_source_references(source_references: &[String]) -> String { if source_references.is_empty() { return "- (none)".to_string(); diff --git a/templates/aiken/audit_agent_initial_user_prompt.md b/templates/aiken/audit_agent_initial_user_prompt.md new file mode 100644 index 0000000..dbd1471 --- /dev/null +++ b/templates/aiken/audit_agent_initial_user_prompt.md @@ -0,0 +1,20 @@ +Analyze Aiken code for this single vulnerability skill. You are given file references only (no source code inline). + +Workspace boundary: +- Workspace root: {{WORKSPACE_ROOT}} +- This is the only workspace you may operate in. +- Do not access or reason about files outside the allowed workspace scope. + +Skill: +{{SKILL}} + +Referenced Aiken files: +{{SOURCE_REFERENCES}} + +Use the referenced files as your starting point. You may read additional files only if they are inside the allowed workspace scope and strictly required to validate the finding. + +Allowed read commands: {{ALLOWED_COMMANDS}} +Scope rules: +- {{SCOPE_RULES}} + +Return JSON action only. \ No newline at end of file diff --git a/templates/aiken/audit_agent_system_prompt.md b/templates/aiken/audit_agent_system_prompt.md new file mode 100644 index 0000000..24e4007 --- /dev/null +++ b/templates/aiken/audit_agent_system_prompt.md @@ -0,0 +1,12 @@ +You are a security auditor specialized in Aiken smart contracts. You must return JSON only. Use an iterative process: request local reads when needed, then finish with findings. + +Valid JSON actions: +1) {"action":"read_file","path":"relative/path.ak"} +2) {"action":"grep","pattern":"regex","path":"relative/path/or/dir","context_lines":2} +3) {"action":"list_dir","path":"relative/path"} +4) {"action":"find_files","path":"relative/path","glob":"*.ak"} +5) {"action":"final","status":"completed|scaffolded","findings":[{"title":string,"severity":string,"summary":string,"evidence":[string],"recommendation":string,"file":string|null,"line":number|null}],"next_prompt":string|null} + +Prefer returning file and line whenever you can confidently identify where the bug exists or where the recommendation applies. + +Never include markdown fences. \ No newline at end of file diff --git a/templates/aiken/audit_agent_tool_result_prompt.md b/templates/aiken/audit_agent_tool_result_prompt.md new file mode 100644 index 0000000..424094f --- /dev/null +++ b/templates/aiken/audit_agent_tool_result_prompt.md @@ -0,0 +1,4 @@ +Tool result for {{REQUEST}}: +{{OUTPUT}} + +Continue and return JSON. \ No newline at end of file From 8ecf5f5c9de703ad6bd8871613f08cf9d76daa74 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Wed, 25 Feb 2026 18:07:32 -0300 Subject: [PATCH 12/18] feat: enhance permission prompt handling and update initial user prompt template --- src/commands/audit/mod.rs | 1 + src/commands/audit/model.rs | 6 ++++ src/commands/audit/providers/anthropic.rs | 8 ++---- src/commands/audit/providers/openai.rs | 8 ++---- src/commands/audit/providers/shared.rs | 28 ++++++++++++++----- .../aiken/audit_agent_initial_user_prompt.md | 10 ++----- templates/aiken/permission_prompt.md | 5 +++- 7 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index 4ac0fe2..c2cc4ae 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -386,6 +386,7 @@ fn build_permission_prompt_spec( "ls".to_string(), ], scope_rules, + workspace_root: project_root.display().to_string(), read_scope: read_scope.as_str().to_string(), interactive_permissions, allowed_paths, diff --git a/src/commands/audit/model.rs b/src/commands/audit/model.rs index e69f761..47edb38 100644 --- a/src/commands/audit/model.rs +++ b/src/commands/audit/model.rs @@ -63,6 +63,8 @@ pub struct PermissionPromptSpec { pub shell: String, pub allowed_commands: Vec, pub scope_rules: Vec, + #[serde(default = "default_workspace_root")] + pub workspace_root: String, #[serde(default = "default_read_scope")] pub read_scope: String, #[serde(default)] @@ -71,6 +73,10 @@ pub struct PermissionPromptSpec { pub allowed_paths: Vec, } +fn default_workspace_root() -> String { + ".".to_string() +} + fn default_read_scope() -> String { "workspace".to_string() } diff --git a/src/commands/audit/providers/anthropic.rs b/src/commands/audit/providers/anthropic.rs index 010096e..19cbfaa 100644 --- a/src/commands/audit/providers/anthropic.rs +++ b/src/commands/audit/providers/anthropic.rs @@ -51,12 +51,8 @@ impl AnalysisProvider for AnthropicProvider { })?; let system_prompt = build_agent_system_prompt(); - let initial_user_prompt = build_initial_user_prompt( - prompt, - source_references, - &canonical_root, - permission_prompt, - ); + let initial_user_prompt = + build_initial_user_prompt(prompt, source_references, permission_prompt); let mut messages = vec![serde_json::json!({ "role": "user", diff --git a/src/commands/audit/providers/openai.rs b/src/commands/audit/providers/openai.rs index 624d9ec..62fca6d 100644 --- a/src/commands/audit/providers/openai.rs +++ b/src/commands/audit/providers/openai.rs @@ -50,12 +50,8 @@ impl AnalysisProvider for OpenAiProvider { })?; let system_prompt = build_agent_system_prompt(); - let initial_user_prompt = build_initial_user_prompt( - prompt, - source_references, - &canonical_root, - permission_prompt, - ); + let initial_user_prompt = + build_initial_user_prompt(prompt, source_references, permission_prompt); let mut messages = vec![ serde_json::json!({ diff --git a/src/commands/audit/providers/shared.rs b/src/commands/audit/providers/shared.rs index 57388bc..e4a0436 100644 --- a/src/commands/audit/providers/shared.rs +++ b/src/commands/audit/providers/shared.rs @@ -17,6 +17,8 @@ const AGENT_SYSTEM_PROMPT: &str = include_str!("../../../../templates/aiken/audit_agent_system_prompt.md"); const INITIAL_USER_PROMPT_TEMPLATE: &str = include_str!("../../../../templates/aiken/audit_agent_initial_user_prompt.md"); +const PERMISSION_PROMPT_TEMPLATE: &str = + include_str!("../../../../templates/aiken/permission_prompt.md"); const TOOL_RESULT_PROMPT_TEMPLATE: &str = include_str!("../../../../templates/aiken/audit_agent_tool_result_prompt.md"); @@ -73,22 +75,32 @@ fn parse_line_number(value: Option<&Value>) -> Option { pub(super) fn build_initial_user_prompt( prompt: &MiniPrompt, source_references: &[String], - workspace_root: &Path, permission_prompt: &PermissionPromptSpec, ) -> String { INITIAL_USER_PROMPT_TEMPLATE .replace("{{SKILL}}", &prompt.text) - .replace("{{WORKSPACE_ROOT}}", &workspace_root.display().to_string()) .replace("{{SOURCE_REFERENCES}}", &render_source_references(source_references)) - .replace("{{ALLOWED_COMMANDS}}", &permission_prompt.allowed_commands.join(", ")) - .replace("{{SCOPE_RULES}}", &permission_prompt.scope_rules.join("\n- ")) + .replace( + "{{PERMISSION_PROMPT}}", + &render_permission_prompt(permission_prompt), + ) } - pub(super) fn build_tool_result_user_prompt(request: &ReadRequest, output: &str) -> String { - TOOL_RESULT_PROMPT_TEMPLATE +pub(super) fn build_tool_result_user_prompt(request: &ReadRequest, output: &str) -> String { + TOOL_RESULT_PROMPT_TEMPLATE .replace("{{REQUEST}}", &format!("{:?}", request)) .replace("{{OUTPUT}}", output) - } +} + +fn render_permission_prompt(permission_prompt: &PermissionPromptSpec) -> String { + PERMISSION_PROMPT_TEMPLATE + .replace("{{ workspace_root }}", &permission_prompt.workspace_root) + .replace( + "{{ allowed_commands }}", + &permission_prompt.allowed_commands.join(", "), + ) + .replace("{{ scope_rules }}", &permission_prompt.scope_rules.join("\n- ")) +} fn render_source_references(source_references: &[String]) -> String { if source_references.is_empty() { @@ -609,6 +621,7 @@ mod tests { shell: "bash".to_string(), allowed_commands: vec!["cat".to_string()], scope_rules: vec![], + workspace_root: root.display().to_string(), read_scope: "strict".to_string(), interactive_permissions: false, allowed_paths: vec!["validators/spend.ak".to_string()], @@ -635,6 +648,7 @@ mod tests { shell: "bash".to_string(), allowed_commands: vec!["ls".to_string()], scope_rules: vec![], + workspace_root: root.display().to_string(), read_scope: "strict".to_string(), interactive_permissions: false, allowed_paths: vec!["validators/spend.ak".to_string()], diff --git a/templates/aiken/audit_agent_initial_user_prompt.md b/templates/aiken/audit_agent_initial_user_prompt.md index dbd1471..b8b0e16 100644 --- a/templates/aiken/audit_agent_initial_user_prompt.md +++ b/templates/aiken/audit_agent_initial_user_prompt.md @@ -1,10 +1,5 @@ Analyze Aiken code for this single vulnerability skill. You are given file references only (no source code inline). -Workspace boundary: -- Workspace root: {{WORKSPACE_ROOT}} -- This is the only workspace you may operate in. -- Do not access or reason about files outside the allowed workspace scope. - Skill: {{SKILL}} @@ -13,8 +8,7 @@ Referenced Aiken files: Use the referenced files as your starting point. You may read additional files only if they are inside the allowed workspace scope and strictly required to validate the finding. -Allowed read commands: {{ALLOWED_COMMANDS}} -Scope rules: -- {{SCOPE_RULES}} +Execution permissions: +{{PERMISSION_PROMPT}} Return JSON action only. \ No newline at end of file diff --git a/templates/aiken/permission_prompt.md b/templates/aiken/permission_prompt.md index 0e47b6e..e630182 100644 --- a/templates/aiken/permission_prompt.md +++ b/templates/aiken/permission_prompt.md @@ -1,4 +1,7 @@ -You are analyzing local Aiken code for vulnerabilities. +Workspace boundary: +- Workspace root: {{ workspace_root }} +- This is the only workspace you may operate in. +- Do not access or reason about files outside the allowed workspace scope. Execution constraints: - Execute commands only in the allowed project scope. From 2f83b0690ee2099e85bad16fb45443d66bd70726 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Thu, 26 Feb 2026 10:33:21 -0300 Subject: [PATCH 13/18] feat: enhance skill prompt formatting and clarify task-priority rules in Aiken template --- src/commands/audit/mod.rs | 20 +++++++++++-------- .../aiken/audit_agent_initial_user_prompt.md | 8 +++++++- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index c2cc4ae..195bfd3 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -303,19 +303,20 @@ fn build_mini_prompt(skill: &VulnerabilitySkill) -> MiniPrompt { fn compose_skill_prompt(skill: &VulnerabilitySkill) -> String { let mut sections = vec![ - format!("Skill ID: {}", skill.id), - format!("Name: {}", skill.name), - format!("Severity: {}", skill.severity), - format!("Description: {}", skill.description), - format!("Prompt Fragment: {}", skill.prompt_fragment), + "Skill Summary (use as source of truth):".to_string(), + format!("- Skill ID: {}", skill.id), + format!("- Name: {}", skill.name), + format!("- Severity: {}", skill.severity), + format!("- Description: {}", skill.description), + format!("- Primary detection objective: {}", skill.prompt_fragment), ]; if !skill.tags.is_empty() { - sections.push(format!("Tags: {}", skill.tags.join(", "))); + sections.push(format!("- Tags: {}", skill.tags.join(", "))); } if let Some(hint) = &skill.confidence_hint { - sections.push(format!("Confidence Hint: {}", hint)); + sections.push(format!("- Confidence Hint: {}", hint)); } if !skill.examples.is_empty() { @@ -334,7 +335,10 @@ fn compose_skill_prompt(skill: &VulnerabilitySkill) -> String { } if !skill.guidance_markdown.trim().is_empty() { - sections.push(format!("Guidance:\n{}", skill.guidance_markdown.trim())); + sections.push(format!( + "Detailed Guidance (authoritative):\n{}", + skill.guidance_markdown.trim() + )); } sections.join("\n\n") diff --git a/templates/aiken/audit_agent_initial_user_prompt.md b/templates/aiken/audit_agent_initial_user_prompt.md index b8b0e16..a81ebe1 100644 --- a/templates/aiken/audit_agent_initial_user_prompt.md +++ b/templates/aiken/audit_agent_initial_user_prompt.md @@ -1,7 +1,13 @@ Analyze Aiken code for this single vulnerability skill. You are given file references only (no source code inline). -Skill: +Task-priority rule: +- Treat the Skill block below as the authoritative, task-specific policy for this run. +- If generic wording elsewhere is broader, keep the Skill block as the source of truth for what to detect and what to ignore. + +Skill (authoritative context): +--- SKILL START --- {{SKILL}} +--- SKILL END --- Referenced Aiken files: {{SOURCE_REFERENCES}} From 80f78c92fe56735b19bc9422266d43a7f201fa8d Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Fri, 27 Feb 2026 12:48:49 -0300 Subject: [PATCH 14/18] feat: integrate Aiken AST generation and validator context extraction --- Cargo.lock | 726 ++++++++++++++++-- Cargo.toml | 1 + design/005-aiken-ast-validator-context.md | 280 +++++++ src/commands/audit/ast.rs | 448 +++++++++++ src/commands/audit/mod.rs | 30 +- src/commands/audit/model.rs | 52 ++ src/commands/audit/providers/anthropic.rs | 12 +- src/commands/audit/providers/mod.rs | 4 +- src/commands/audit/providers/openai.rs | 12 +- src/commands/audit/providers/scaffold.rs | 4 +- src/commands/audit/providers/shared.rs | 116 ++- .../aiken/audit_agent_initial_user_prompt.md | 7 + tests/e2e/happy_path.rs | 6 + 13 files changed, 1632 insertions(+), 66 deletions(-) create mode 100644 design/005-aiken-ast-validator-context.md create mode 100644 src/commands/audit/ast.rs diff --git a/Cargo.lock b/Cargo.lock index 99640c6..0a55097 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -49,6 +49,35 @@ dependencies = [ "memchr", ] +[[package]] +name = "aiken-lang" +version = "1.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578941589556cb61cc9c4f17ef8159d15c72b59b9602f3f506d85088d65d597c" +dependencies = [ + "blst", + "built", + "chumsky 0.9.3", + "cryptoxide 0.4.4", + "hex", + "indexmap 1.9.3", + "indoc", + "itertools 0.10.5", + "miette 7.6.0", + "num-bigint", + "ordinal", + "owo-colors 3.5.0", + "pallas-primitives 0.33.0", + "patricia_tree", + "petgraph 0.6.5", + "pretty 0.12.5", + "serde", + "strum 0.26.3", + "thiserror 1.0.69", + "uplc", + "vec1", +] + [[package]] name = "allocator-api2" version = "0.2.21" @@ -141,6 +170,12 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + [[package]] name = "arrayvec" version = "0.7.6" @@ -243,6 +278,17 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -320,6 +366,12 @@ dependencies = [ "backtrace", ] +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + [[package]] name = "base58" version = "0.2.0" @@ -344,6 +396,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "basic-toml" version = "0.1.10" @@ -424,6 +482,18 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -456,6 +526,15 @@ dependencies = [ "serde", ] +[[package]] +name = "built" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56ed6191a7e78c36abdb16ab65341eefd73d64d303fffccdbb00d51e4205967b" +dependencies = [ + "git2", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -526,6 +605,16 @@ dependencies = [ "windows-link", ] +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown 0.14.5", + "stacker", +] + [[package]] name = "chumsky" version = "1.0.0-alpha.7" @@ -634,6 +723,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "const_format" version = "0.2.34" @@ -877,6 +972,18 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -975,6 +1082,16 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b" +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "zeroize", +] + [[package]] name = "deranged" version = "0.4.0" @@ -1062,6 +1179,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] @@ -1148,6 +1266,20 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" +dependencies = [ + "der", + "digest", + "elliptic-curve", + "rfc6979", + "signature", + "spki", +] + [[package]] name = "ed25519-bip32" version = "0.4.1" @@ -1163,6 +1295,25 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "elliptic-curve" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct", + "crypto-bigint", + "digest", + "ff", + "generic-array", + "group", + "pkcs8", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] + [[package]] name = "encode_unicode" version = "1.0.0" @@ -1200,12 +1351,28 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "ff" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "find-msvc-tools" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3" +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -1262,6 +1429,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.31" @@ -1377,6 +1550,7 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", + "zeroize", ] [[package]] @@ -1424,12 +1598,36 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "git2" +version = "0.20.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b88256088d75a56f8ecfa070513a775dd9107f6530ef14919dac831af9cfe2b" +dependencies = [ + "bitflags 2.9.1", + "libc", + "libgit2-sys", + "log", + "url", +] + [[package]] name = "glob" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "h2" version = "0.4.11" @@ -1459,6 +1657,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "hamming" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65043da274378d68241eb9a8f8f8aa54e349136f7b8e12f63e3ef44043cc30e1" + [[package]] name = "handlebars" version = "6.3.2" @@ -1503,6 +1707,15 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.5.2" @@ -1530,7 +1743,7 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5313b072ce3c597065a808dbf612c4c8e8590bdbf8b579508bf7a762c5eae6cd" dependencies = [ - "arrayvec", + "arrayvec 0.7.6", ] [[package]] @@ -1861,6 +2074,15 @@ dependencies = [ "serde", ] +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + [[package]] name = "inout" version = "0.1.4" @@ -1938,6 +2160,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -2012,6 +2243,20 @@ dependencies = [ "sha2", ] +[[package]] +name = "k256" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b" +dependencies = [ + "cfg-if", + "ecdsa", + "elliptic-curve", + "once_cell", + "sha2", + "signature", +] + [[package]] name = "lazy-regex" version = "3.6.0" @@ -2047,6 +2292,18 @@ version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +[[package]] +name = "libgit2-sys" +version = "0.18.3+1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9b3acc4b91781bb0b3386669d325163746af5f6e4f73e6d2d630e09a35f3487" +dependencies = [ + "cc", + "libc", + "libz-sys", + "pkg-config", +] + [[package]] name = "libredox" version = "0.1.4" @@ -2066,6 +2323,18 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "libz-sys" +version = "1.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4735e9cbde5aac84a5ce588f6b23a90b9b0b528f6c5a8db8a4aff300463a0839" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -2133,6 +2402,18 @@ version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +[[package]] +name = "miette" +version = "5.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59bb584eaeeab6bd0226ccf3509a69d7936d148cf3d036ad350abe35e8c6856e" +dependencies = [ + "miette-derive 5.10.0", + "once_cell", + "thiserror 1.0.69", + "unicode-width 0.1.14", +] + [[package]] name = "miette" version = "7.6.0" @@ -2142,9 +2423,9 @@ dependencies = [ "backtrace", "backtrace-ext", "cfg-if", - "miette-derive", - "owo-colors", - "supports-color", + "miette-derive 7.6.0", + "owo-colors 4.2.2", + "supports-color 3.0.2", "supports-hyperlinks", "supports-unicode", "terminal_size", @@ -2152,6 +2433,17 @@ dependencies = [ "unicode-width 0.1.14", ] +[[package]] +name = "miette-derive" +version = "5.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "miette-derive" version = "7.6.0" @@ -2175,6 +2467,7 @@ version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0452a60c1863c1f50b5f77cd295e8d2786849f35883f0b9e18e7e6e1b5691b0" dependencies = [ + "half", "minicbor-derive 0.15.3", ] @@ -2365,7 +2658,7 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ - "hermit-abi", + "hermit-abi 0.5.2", "libc", ] @@ -2416,8 +2709,8 @@ dependencies = [ "regex", "serde", "serde_json", - "strum", - "strum_macros", + "strum 0.27.2", + "strum_macros 0.27.1", "thiserror 2.0.17", ] @@ -2534,6 +2827,24 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordinal" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c80c1530f46e9d8985706d7deb80b83172b250538902f607dea6cd6028851083" +dependencies = [ + "num-integer", +] + +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +dependencies = [ + "supports-color 1.3.1", +] + [[package]] name = "owo-colors" version = "4.2.2" @@ -2546,19 +2857,35 @@ version = "1.0.0-alpha.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c593225da7e45c57d209c8315805533ca597c0975ceeaf9c53ca96315c52bbb6" dependencies = [ - "pallas-addresses", - "pallas-codec", + "pallas-addresses 1.0.0-alpha.4", + "pallas-codec 1.0.0-alpha.4", "pallas-configs", - "pallas-crypto", + "pallas-crypto 1.0.0-alpha.4", "pallas-hardano", "pallas-network", - "pallas-primitives", - "pallas-traverse", + "pallas-primitives 1.0.0-alpha.4", + "pallas-traverse 1.0.0-alpha.4", "pallas-txbuilder", "pallas-utxorpc", "pallas-validate", ] +[[package]] +name = "pallas-addresses" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18f5f4dd205316335bf8eef77227e01a8a00b1fd60503d807520e93dd0362d0e" +dependencies = [ + "base58", + "bech32", + "crc", + "cryptoxide 0.4.4", + "hex", + "pallas-codec 0.33.0", + "pallas-crypto 0.33.0", + "thiserror 1.0.69", +] + [[package]] name = "pallas-addresses" version = "1.0.0-alpha.4" @@ -2570,8 +2897,21 @@ dependencies = [ "crc", "cryptoxide 0.4.4", "hex", - "pallas-codec", - "pallas-crypto", + "pallas-codec 1.0.0-alpha.4", + "pallas-crypto 1.0.0-alpha.4", + "thiserror 1.0.69", +] + +[[package]] +name = "pallas-codec" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2737b05f0dbb6d197feeb26ef15d2567e54833184bd469f5655a0537da89fa" +dependencies = [ + "hex", + "minicbor 0.25.1", + "num-bigint", + "serde", "thiserror 1.0.69", ] @@ -2595,14 +2935,29 @@ checksum = "e995aa8ed6c3a7f6fa75dbb513a62c6619840de7be8e74ef5f283adec528823a" dependencies = [ "base64 0.22.1", "num-rational", - "pallas-addresses", - "pallas-crypto", - "pallas-primitives", + "pallas-addresses 1.0.0-alpha.4", + "pallas-crypto 1.0.0-alpha.4", + "pallas-primitives 1.0.0-alpha.4", "serde", "serde_json", "serde_with", ] +[[package]] +name = "pallas-crypto" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0368945cd093e550febe36aef085431b1611c2e9196297cd70f4b21a4add054c" +dependencies = [ + "cryptoxide 0.4.4", + "hex", + "pallas-codec 0.33.0", + "rand_core 0.6.4", + "serde", + "thiserror 1.0.69", + "zeroize", +] + [[package]] name = "pallas-crypto" version = "1.0.0-alpha.4" @@ -2611,7 +2966,7 @@ checksum = "0f2e8c4de80742b21581edab58212683a51bca18047cc50ad585c3b2d2009642" dependencies = [ "cryptoxide 0.4.4", "hex", - "pallas-codec", + "pallas-codec 1.0.0-alpha.4", "rand_core 0.9.3", "serde", "thiserror 1.0.69", @@ -2625,11 +2980,11 @@ checksum = "62fe7b454c3b3e175b0cd7deade25a2298972370ed2b06f63ea7ea4ec7fe62ca" dependencies = [ "binary-layout", "hex", - "pallas-addresses", - "pallas-codec", - "pallas-crypto", + "pallas-addresses 1.0.0-alpha.4", + "pallas-codec 1.0.0-alpha.4", + "pallas-crypto 1.0.0-alpha.4", "pallas-network", - "pallas-traverse", + "pallas-traverse 1.0.0-alpha.4", "serde", "serde_json", "serde_with", @@ -2647,8 +3002,8 @@ dependencies = [ "byteorder", "hex", "itertools 0.13.0", - "pallas-codec", - "pallas-crypto", + "pallas-codec 1.0.0-alpha.4", + "pallas-crypto 1.0.0-alpha.4", "rand", "socket2 0.5.10", "thiserror 1.0.69", @@ -2656,6 +3011,22 @@ dependencies = [ "tracing", ] +[[package]] +name = "pallas-primitives" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb2acde8875c43446194d387c60fe2d6a127e4f8384bef3dcabd5a04e9422429" +dependencies = [ + "base58", + "bech32", + "hex", + "log", + "pallas-codec 0.33.0", + "pallas-crypto 0.33.0", + "serde", + "serde_json", +] + [[package]] name = "pallas-primitives" version = "1.0.0-alpha.4" @@ -2663,12 +3034,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "053c5cb5c9964f5fd8e14d51f30d2b155c2065f25082c9209de9a6c257c5b54d" dependencies = [ "hex", - "pallas-codec", - "pallas-crypto", + "pallas-codec 1.0.0-alpha.4", + "pallas-crypto 1.0.0-alpha.4", "serde", "serde_json", ] +[[package]] +name = "pallas-traverse" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab64895a0d94fed1ef2d99dd37e480ed0483e91eb98dcd2f94cc614fb9575173" +dependencies = [ + "hex", + "itertools 0.13.0", + "pallas-addresses 0.33.0", + "pallas-codec 0.33.0", + "pallas-crypto 0.33.0", + "pallas-primitives 0.33.0", + "paste", + "serde", + "thiserror 1.0.69", +] + [[package]] name = "pallas-traverse" version = "1.0.0-alpha.4" @@ -2677,10 +3065,10 @@ checksum = "abc220a19443ba76df3f09ceb43ab303a5cb031b8062beb70d7ed50ced15f09f" dependencies = [ "hex", "itertools 0.13.0", - "pallas-addresses", - "pallas-codec", - "pallas-crypto", - "pallas-primitives", + "pallas-addresses 1.0.0-alpha.4", + "pallas-codec 1.0.0-alpha.4", + "pallas-crypto 1.0.0-alpha.4", + "pallas-primitives 1.0.0-alpha.4", "paste", "serde", "thiserror 1.0.69", @@ -2693,11 +3081,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad12eaa6451ff6104c65e980f076763f30ac0dee5612544bc0485a92abdc64da" dependencies = [ "hex", - "pallas-addresses", - "pallas-codec", - "pallas-crypto", - "pallas-primitives", - "pallas-traverse", + "pallas-addresses 1.0.0-alpha.4", + "pallas-codec 1.0.0-alpha.4", + "pallas-crypto 1.0.0-alpha.4", + "pallas-primitives 1.0.0-alpha.4", + "pallas-traverse 1.0.0-alpha.4", "serde", "serde_json", "thiserror 1.0.69", @@ -2711,13 +3099,13 @@ checksum = "d17ae8308fa96979da3b2314d0b28e85403448751880582d4f2beafa7d68bb6c" dependencies = [ "blst", "bumpalo", - "chumsky", + "chumsky 1.0.0-alpha.7", "cryptoxide 0.4.4", "ibig", "minicbor 0.25.1", "num-traits", "once_cell", - "secp256k1", + "secp256k1 0.30.0", "thiserror 1.0.69", ] @@ -2727,10 +3115,10 @@ version = "1.0.0-alpha.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e61f14013e304cf24eeb5eabcf062f3a2f455d6ccb6e1b33a84810e7488fd3f" dependencies = [ - "pallas-codec", - "pallas-crypto", - "pallas-primitives", - "pallas-traverse", + "pallas-codec 1.0.0-alpha.4", + "pallas-crypto 1.0.0-alpha.4", + "pallas-primitives 1.0.0-alpha.4", + "pallas-traverse 1.0.0-alpha.4", "pallas-validate", "prost-types", "utxorpc-spec 0.18.1", @@ -2745,11 +3133,11 @@ dependencies = [ "chrono", "hex", "itertools 0.14.0", - "pallas-addresses", - "pallas-codec", - "pallas-crypto", - "pallas-primitives", - "pallas-traverse", + "pallas-addresses 1.0.0-alpha.4", + "pallas-codec 1.0.0-alpha.4", + "pallas-crypto 1.0.0-alpha.4", + "pallas-primitives 1.0.0-alpha.4", + "pallas-traverse 1.0.0-alpha.4", "pallas-uplc", "serde", "thiserror 1.0.69", @@ -2785,6 +3173,15 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "patricia_tree" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f2f4539bffe53fc4b4da301df49d114b845b077bd5727b7fe2bd9d8df2ae68" +dependencies = [ + "bitflags 2.9.1", +] + [[package]] name = "pbjson" version = "0.7.0" @@ -2832,6 +3229,33 @@ dependencies = [ "hmac", ] +[[package]] +name = "peg" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9928cfca101b36ec5163e70049ee5368a8a1c3c6efc9ca9c5f9cc2f816152477" +dependencies = [ + "peg-macros", + "peg-runtime", +] + +[[package]] +name = "peg-macros" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6298ab04c202fa5b5d52ba03269fb7b74550b150323038878fe6c372d8280f71" +dependencies = [ + "peg-runtime", + "proc-macro2", + "quote", +] + +[[package]] +name = "peg-runtime" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "132dca9b868d927b35b5dd728167b2dee150eb1ad686008fc71ccb298b776fca" + [[package]] name = "pem" version = "3.0.5" @@ -2855,7 +3279,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" dependencies = [ "memchr", - "miette", + "miette 7.6.0", "serde", "serde_json", "thiserror 2.0.17", @@ -2895,13 +3319,23 @@ dependencies = [ "sha2", ] +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset 0.4.2", + "indexmap 2.10.0", +] + [[package]] name = "petgraph" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "fixedbitset", + "fixedbitset 0.5.7", "indexmap 2.10.0", ] @@ -2937,6 +3371,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -2997,6 +3441,29 @@ dependencies = [ "termtree", ] +[[package]] +name = "pretty" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83f3aa1e3ca87d3b124db7461265ac176b40c277f37e503eaa29c9c75c037846" +dependencies = [ + "arrayvec 0.5.2", + "log", + "typed-arena", + "unicode-segmentation", +] + +[[package]] +name = "pretty" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d22152487193190344590e4f30e219cf3fe140d9e7a3fdb683d82aa2c5f4156" +dependencies = [ + "arrayvec 0.5.2", + "typed-arena", + "unicode-width 0.2.1", +] + [[package]] name = "prettyplease" version = "0.2.35" @@ -3059,7 +3526,7 @@ dependencies = [ "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.7.1", "prettyplease", "prost", "prost-types", @@ -3114,6 +3581,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -3299,6 +3772,16 @@ dependencies = [ "web-sys", ] +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac", + "subtle", +] + [[package]] name = "ring" version = "0.17.14" @@ -3454,6 +3937,29 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + +[[package]] +name = "secp256k1" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4124a35fe33ae14259c490fd70fa199a32b9ce9502f2ee6bc4f81ec06fa65894" +dependencies = [ + "secp256k1-sys 0.8.2", +] + [[package]] name = "secp256k1" version = "0.30.0" @@ -3462,7 +3968,16 @@ checksum = "b50c5943d326858130af85e049f2661ba3c78b26589b8ab98e65e80ae44a1252" dependencies = [ "bitcoin_hashes 0.14.0", "rand", - "secp256k1-sys", + "secp256k1-sys 0.10.1", +] + +[[package]] +name = "secp256k1-sys" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4473013577ec77b4ee3668179ef1186df3146e2cf2d927bd200974c6fe60fd99" +dependencies = [ + "cc", ] [[package]] @@ -3711,6 +4226,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.7" @@ -3788,6 +4313,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -3825,12 +4360,34 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros 0.26.4", +] + [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "strum_macros" version = "0.27.1" @@ -3850,6 +4407,16 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "supports-color" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ba6faf2ca7ee42fdd458f4347ae0a9bd6bcc445ad7cb57ad82b383f18870d6f" +dependencies = [ + "atty", + "is_ci", +] + [[package]] name = "supports-color" version = "3.0.2" @@ -4387,6 +4954,7 @@ dependencies = [ name = "trix" version = "0.20.0" dependencies = [ + "aiken-lang", "anyhow", "askama", "assert_cmd", @@ -4405,7 +4973,7 @@ dependencies = [ "inquire", "insta", "libc", - "miette", + "miette 7.6.0", "oci-client", "octocrab", "pallas", @@ -4444,7 +5012,7 @@ checksum = "22ccd68861b3f9fe5dc2df8ba4b258cc9a29ec23c17da0e1583ab11149e4aaf4" dependencies = [ "ciborium", "hex", - "miette", + "miette 7.6.0", "pest", "pest_derive", "serde", @@ -4465,6 +5033,12 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "typenum" version = "1.18.0" @@ -4540,6 +5114,39 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "uplc" +version = "1.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af10ae941c734f297a8ab1a08d79aa16e4216552bdc6b526dff8d91115c1eed9" +dependencies = [ + "bitvec", + "blst", + "cryptoxide 0.4.4", + "hamming", + "hex", + "indexmap 1.9.3", + "itertools 0.10.5", + "k256", + "miette 5.10.0", + "num-bigint", + "num-integer", + "num-traits", + "once_cell", + "pallas-addresses 0.33.0", + "pallas-codec 0.33.0", + "pallas-crypto 0.33.0", + "pallas-primitives 0.33.0", + "pallas-traverse 0.33.0", + "peg", + "pretty 0.11.3", + "secp256k1 0.26.0", + "serde", + "serde_json", + "strum 0.26.3", + "thiserror 1.0.69", +] + [[package]] name = "url" version = "2.5.4" @@ -4621,6 +5228,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vec1" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eab68b56840f69efb0fefbe3ab6661499217ffdc58e2eef7c3f6f69835386322" + [[package]] name = "version_check" version = "0.9.5" @@ -5102,6 +5715,15 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xz2" version = "0.1.7" diff --git a/Cargo.toml b/Cargo.toml index 1062b64..4de541d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,6 +59,7 @@ tokio-util = "0.7" tracing-subscriber = "0.3.22" dotenv-parser = "0.1.3" termimad = "0.31" +aiken-lang = "1.1.21" [dev-dependencies] assert_cmd = "2.0" diff --git a/design/005-aiken-ast-validator-context.md b/design/005-aiken-ast-validator-context.md new file mode 100644 index 0000000..704366d --- /dev/null +++ b/design/005-aiken-ast-validator-context.md @@ -0,0 +1,280 @@ +# Aiken AST & Validator Context for Audit + +## Status + +Proposed implementation spec for extending `trix audit` with: +- **Phase 1**: on-demand Aiken AST generation +- **Phase 2**: `ValidatorContextMap` extraction from AST + +--- + +## Goals + +1. Ensure `trix audit` can obtain a **fresh structural view** of Aiken code without relying on pre-existing artifacts. +2. Build a deterministic `ValidatorContextMap` that can be injected into audit prompts. +3. Persist enough metadata in state to make runs reproducible and diagnosable. + +--- + +## Scope + +- New AST generation flow in `audit` execution path. +- New model contract for validator context. +- Prompt template/data-path extension to include validator context. +- State JSON extension to include AST/context metadata. +- Failure semantics for AST generation/parsing. +- Unit/e2e acceptance coverage for phase behavior. + +--- + +## High-Level Flow (Phase 1 + 2) + +Before skill loop execution: + +1. Discover `.ak` source files (existing behavior). +2. Generate Aiken AST on-demand (new behavior). +3. Parse AST into normalized internal structures. +4. Build `ValidatorContextMap` (validator-centric mapping). +5. Add this context to: + - initial prompt rendering payload + - persisted analysis state +6. Run existing skill loop unchanged, except prompts now include validator context block. + +--- + +## CLI Surface Changes + +No mandatory user-facing flags are required for baseline phase 1–2. + +Optional (recommended) additions: +- `--ast-out ` (default: `.tx3/audit/aiken-ast.json`) +- `--no-ast-cache` (default: false) + +If optional flags are deferred, runtime should still write AST snapshot to default path. + +--- + +## Data Contracts + +## `AnalysisStateJson` extension + +Add fields: + +```json +{ + "ast": { + "path": ".tx3/audit/aiken-ast.json", + "fingerprint": "sha256:...", + "generated_at": "2026-02-26T12:00:00Z", + "tool": { + "name": "aiken", + "version": "vX.Y.Z" + } + }, + "validator_context": { + "validators": [ ... ] + } +} +``` + +### `AstMetadata` + +- `path`: persisted AST snapshot path (workspace-relative in state) +- `fingerprint`: deterministic digest of AST content (or source-set digest) +- `generated_at`: RFC3339 UTC timestamp +- `tool.name`: fixed string `aiken` +- `tool.version`: resolved from CLI runtime + +### `ValidatorContextMap` + +```json +{ + "validators": [ + { + "id": "vesting.hello_world", + "module": "validators/vesting.ak", + "source_file": "onchain/validators/vesting.ak", + "source_span": { + "start_line": 13, + "end_line": 31 + }, + "handlers": [ + { + "name": "spend", + "parameters": [ + { "name": "datum", "type": "Option" }, + { "name": "redeemer", "type": "Redeemer" }, + { "name": "_own_ref", "type": "OutputReference" }, + { "name": "self", "type": "Transaction" } + ] + }, + { + "name": "else", + "parameters": [ + { "name": "_", "type": "Unknown" } + ] + } + ] + } + ] +} +``` + +Normalization rules: +- `validators` MUST be sorted deterministically by `id` then `source_file`. +- `handlers` MUST preserve source order when available. +- `parameters` MUST preserve declared order. +- If precise type text is unavailable, set type to `"Unknown"` (do not omit parameter). +- If source span is unavailable, omit `source_span`. + +--- + +## AST Generation Contract (Phase 1) + +`audit` MUST execute an on-demand AST generation step before skill analysis. + +Requirements: +- MUST run within current project root. +- MUST fail the audit run if AST generation fails. +- MUST persist raw AST output to `.tx3/audit/aiken-ast.json` (or configured path). +- MUST record Aiken tool version in state metadata. +- SHOULD avoid repeated generation in same run once AST is available. + +Failure behavior: +- Return explicit error category: + - Aiken CLI missing + - Aiken command failed + - AST output unreadable/invalid JSON + +No fallback behavior is defined in this phase. + +--- + +## Validator Context Extraction (Phase 2) + +Parser must transform AST into `ValidatorContextMap`. + +Extraction requirements: +- MUST enumerate all validator definitions in analyzed source set. +- MUST extract handler names and ordered parameter lists. +- MUST include best-effort type display for each parameter. +- MUST include source file path linkage for each validator. +- SHOULD include source spans when present in AST. + +Validation requirements: +- If AST is valid but yields no validators, run continues with empty validator list. +- If AST schema is incompatible, fail with parse-contract error. + +--- + +## Prompt Integration + +Template update target: +- `templates/aiken/audit_agent_initial_user_prompt.md` + +Add new section after source references: + +```markdown +Validator context map: +{{VALIDATOR_CONTEXT_MAP}} +``` + +Rendering rules: +- Use concise markdown bullets (not raw JSON dump) for readability. +- Include: + - validator id + - source file + - handlers and parameter signatures +- If empty: render `- (none)`. + +Provider integration: +- Existing providers (`openai`, `anthropic`, `ollama`, `scaffold`) receive the same expanded prompt content via shared builder. + +--- + +## Implementation Notes (Code Placement) + +Likely code touchpoints: +- `src/commands/audit/mod.rs` + - orchestration: AST generation + context extraction prior to skill loop + - state population +- `src/commands/audit/model.rs` + - add `AstMetadata`, `ValidatorContextMap`, related structs +- `src/commands/audit/providers/shared.rs` + - extend `build_initial_user_prompt(...)` + - renderer for validator context markdown block +- `templates/aiken/audit_agent_initial_user_prompt.md` + - add `{{VALIDATOR_CONTEXT_MAP}}` placeholder + +Recommended internal modules: +- `src/commands/audit/ast.rs` + - command execution + AST load + - schema adapter/parser into internal normalized models + +--- + +## Determinism & Caching + +Minimum deterministic guarantees: +- Stable sort ordering for validator map. +- Stable markdown rendering order. +- State includes fingerprint for traceability. + +Caching (optional in phase 1–2, but recommended): +- Reuse AST file if fingerprint of relevant sources unchanged. +- `--no-ast-cache` bypasses reuse. + +--- + +## Security & Permissions + +- AST generation is local and non-interactive. +- No additional AI read permissions are introduced by this phase. +- Generated AST artifact remains inside project `.tx3/` output scope. + +--- + +## Acceptance Criteria + +Phase 1 accepted when: +- `trix audit` generates AST snapshot on each run (or cache-hit behavior if enabled). +- Run fails clearly when Aiken CLI/AST generation fails. +- State JSON includes AST metadata block. + +Phase 2 accepted when: +- Validator context map is extracted and persisted in state. +- Initial provider prompt includes rendered validator context map. +- Map includes validator handlers and ordered parameter signatures. +- Deterministic ordering verified by tests. + +--- + +## Testing Plan + +Unit tests: +- AST parse adapter: + - parses validators/handlers/parameters + - handles missing type info with `Unknown` + - deterministic sorting +- Prompt renderer: + - renders non-empty context map + - renders `- (none)` for empty map + +Integration/e2e tests: +- `audit` produces `.tx3/audit/aiken-ast.json`. +- `state.json` contains `ast` and `validator_context` blocks. +- Prompt-building path includes `Validator context map:` section. + +Negative tests: +- Missing Aiken binary => explicit failure. +- Invalid AST JSON => explicit failure. + +--- + +## Open Questions + +1. Which exact Aiken command/output format is canonical for AST export in current supported versions? +2. Should type rendering preserve Aiken syntax verbatim or use normalized aliases? +3. Should `source_span` include columns now or lines only? + +These questions must be resolved before implementation starts, but do not change the phase scope. diff --git a/src/commands/audit/ast.rs b/src/commands/audit/ast.rs new file mode 100644 index 0000000..ecc53b6 --- /dev/null +++ b/src/commands/audit/ast.rs @@ -0,0 +1,448 @@ +use aiken_lang::{ + ast::{ + self, Annotation, ArgBy, ModuleKind, UntypedArg, UntypedDefinition, UntypedFunction, + UntypedModule, + }, + parser, + version, +}; +use chrono::Utc; +use cryptoxide::{digest::Digest as _, sha2::Sha256}; +use miette::{Context, IntoDiagnostic, Result}; +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; + +use super::model::{ + AstMetadata, AstToolMetadata, SourceSpan, ValidatorContextEntry, ValidatorContextMap, + ValidatorHandlerContext, ValidatorParameterContext, +}; + +#[derive(Debug, Clone)] +pub struct AstBuildOutput { + pub metadata: AstMetadata, + pub validator_context: ValidatorContextMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct AstSnapshot { + schema_version: u8, + generated_at: String, + tool: AstToolMetadata, + source_fingerprint: String, + files: Vec, + validator_context: ValidatorContextMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct AstFileSnapshot { + source_file: String, + ast: String, +} + +#[derive(Debug, Clone)] +struct ParsedModule { + source_file: String, + module: UntypedModule, + ast_debug: String, +} + +pub fn generate_ast_and_validator_context( + project_root: &Path, + source_files: &[PathBuf], + ast_out_path: &Path, + no_ast_cache: bool, +) -> Result { + let source_fingerprint = fingerprint_sources(project_root, source_files)?; + + if ast_out_path.exists() && !no_ast_cache { + let cached_text = std::fs::read_to_string(ast_out_path) + .into_diagnostic() + .with_context(|| format!("Failed to read cached AST JSON at {}", ast_out_path.display()))?; + + let cached_snapshot: AstSnapshot = serde_json::from_str(&cached_text) + .into_diagnostic() + .context("AST output unreadable/invalid JSON")?; + + if cached_snapshot.source_fingerprint == source_fingerprint { + return Ok(AstBuildOutput { + metadata: AstMetadata { + path: display_path_for_state(project_root, ast_out_path), + fingerprint: format!("sha256:{}", sha256_hex(cached_text.as_bytes())), + generated_at: cached_snapshot.generated_at, + tool: cached_snapshot.tool, + }, + validator_context: cached_snapshot.validator_context, + }); + } + } + + let parsed_modules = source_files + .iter() + .map(|source_file| parse_module_snapshot(project_root, source_file)) + .collect::>>()?; + + let validator_context = build_validator_context_from_modules(&parsed_modules); + + let files = parsed_modules + .iter() + .map(|module| AstFileSnapshot { + source_file: module.source_file.clone(), + ast: module.ast_debug.clone(), + }) + .collect::>(); + + let snapshot = AstSnapshot { + schema_version: 1, + generated_at: Utc::now().to_rfc3339(), + tool: AstToolMetadata { + name: "aiken".to_string(), + version: version::compiler_version(false), + }, + source_fingerprint, + files, + validator_context: validator_context.clone(), + }; + + let serialized_snapshot = serde_json::to_string_pretty(&snapshot).into_diagnostic()?; + write_text_file(ast_out_path, &serialized_snapshot)?; + + Ok(AstBuildOutput { + metadata: AstMetadata { + path: display_path_for_state(project_root, ast_out_path), + fingerprint: format!("sha256:{}", sha256_hex(serialized_snapshot.as_bytes())), + generated_at: snapshot.generated_at, + tool: snapshot.tool, + }, + validator_context: snapshot.validator_context, + }) +} + + fn parse_module_snapshot(project_root: &Path, source_file: &Path) -> Result { + let src = std::fs::read_to_string(source_file) + .into_diagnostic() + .with_context(|| format!("Failed to read source file {}", source_file.display()))?; + + let (module, _) = parser::module(&src, ModuleKind::Validator).map_err(|errors| { + let rendered = errors + .iter() + .map(|error| format!("{error:?}")) + .collect::>() + .join("\n"); + + miette::miette!( + "Aiken command failed: parser error(s) while generating AST for {}\n{}", + display_path_for_state(project_root, source_file), + rendered + ) + })?; + + Ok(ParsedModule { + source_file: display_path_for_state(project_root, source_file), + ast_debug: format!("{:#?}", &module), + module, + }) +} + +fn build_validator_context_from_modules(modules: &[ParsedModule]) -> ValidatorContextMap { + let mut validators = modules + .iter() + .flat_map(|module_snapshot| { + module_snapshot + .module + .definitions + .iter() + .filter_map(|definition| { + let UntypedDefinition::Validator(validator) = definition else { + return None; + }; + + let module_id = module_name_from_source_file(&module_snapshot.source_file); + let id = format!("{}.{}", module_id, validator.name); + + let mut handlers = validator + .handlers + .iter() + .map(function_to_handler_context) + .collect::>(); + handlers.push(function_to_handler_context(&validator.fallback)); + + Some(ValidatorContextEntry { + id, + module: module_snapshot.source_file.clone(), + source_file: module_snapshot.source_file.clone(), + source_span: resolve_source_span(&module_snapshot.module, validator.location), + handlers, + }) + }) + .collect::>() + }) + .collect::>(); + + validators.sort_by(|left, right| { + left.id + .cmp(&right.id) + .then_with(|| left.source_file.cmp(&right.source_file)) + }); + + ValidatorContextMap { validators } +} + +fn function_to_handler_context(function: &UntypedFunction) -> ValidatorHandlerContext { + let parameters = function + .arguments + .iter() + .enumerate() + .map(|(index, argument)| ValidatorParameterContext { + name: argument_name(argument, index), + r#type: argument + .annotation + .as_ref() + .map(annotation_to_string) + .unwrap_or_else(|| "Unknown".to_string()), + }) + .collect::>(); + + ValidatorHandlerContext { + name: function.name.clone(), + parameters, + } +} + +fn argument_name(argument: &UntypedArg, index: usize) -> String { + match &argument.by { + ArgBy::ByName(name) => name.get_name(), + ArgBy::ByPattern(_) => argument.arg_name(index).get_name(), + } +} + +fn annotation_to_string(annotation: &Annotation) -> String { + match annotation { + Annotation::Constructor { + module, + name, + arguments, + .. + } => { + let qualified = module + .as_ref() + .map(|module_name| format!("{module_name}.{name}")) + .unwrap_or_else(|| name.clone()); + + if arguments.is_empty() { + qualified + } else { + format!( + "{}<{}>", + qualified, + arguments + .iter() + .map(annotation_to_string) + .collect::>() + .join(", ") + ) + } + } + Annotation::Fn { arguments, ret, .. } => format!( + "fn({}) -> {}", + arguments + .iter() + .map(annotation_to_string) + .collect::>() + .join(", "), + annotation_to_string(ret) + ), + Annotation::Var { name, .. } | Annotation::Hole { name, .. } => name.clone(), + Annotation::Tuple { elems, .. } => format!( + "({})", + elems + .iter() + .map(annotation_to_string) + .collect::>() + .join(", ") + ), + Annotation::Pair { fst, snd, .. } => { + format!("Pair<{}, {}>", annotation_to_string(fst), annotation_to_string(snd)) + } + } +} + +fn resolve_source_span(module: &UntypedModule, span: ast::Span) -> Option { + let start_line = module.lines.line_number(span.start)?; + let end_byte = span.end.saturating_sub(1); + let end_line = module.lines.line_number(end_byte).unwrap_or(start_line); + + Some(SourceSpan { + start_line, + end_line, + }) +} + +fn module_name_from_source_file(source_file: &str) -> String { + let without_extension = source_file.strip_suffix(".ak").unwrap_or(source_file); + without_extension.replace('/', ".") +} + +fn fingerprint_sources(project_root: &Path, source_files: &[PathBuf]) -> Result { + let mut hasher = Sha256::new(); + + for source_file in source_files { + let relative_path = display_path_for_state(project_root, source_file); + let content = std::fs::read(source_file) + .into_diagnostic() + .with_context(|| format!("Failed to read source file {}", source_file.display()))?; + + hasher.input(relative_path.as_bytes()); + hasher.input(b"\0"); + hasher.input(&content); + hasher.input(b"\0"); + } + + Ok(format!("sha256:{}", hasher.result_str())) +} + +fn display_path_for_state(project_root: &Path, path: &Path) -> String { + path.strip_prefix(project_root) + .map(|relative| relative.display().to_string()) + .unwrap_or_else(|_| path.display().to_string()) +} + +fn sha256_hex(value: &[u8]) -> String { + let mut hasher = Sha256::new(); + hasher.input(value); + hasher.result_str() +} + +fn write_text_file(path: &Path, content: &str) -> Result<()> { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent) + .into_diagnostic() + .with_context(|| format!("Failed to create output directory {}", parent.display()))?; + } + + std::fs::write(path, content) + .into_diagnostic() + .with_context(|| format!("Failed to write file {}", path.display())) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + #[test] + fn validator_context_extracts_handlers_and_types() { + let src = r#" +use cardano/transaction.{OutputReference, Transaction} + +pub type Datum { + owner: ByteArray, +} + +pub type Redeemer { + msg: ByteArray, +} + +validator hello_world { + spend( + datum: Option, + redeemer: Redeemer, + _own_ref: OutputReference, + self: Transaction, + ) { + True + } + + else(_) { + fail + } +} +"#; + + let (module, _) = parser::module(src, ModuleKind::Validator).expect("parse module"); + let context = build_validator_context_from_modules(&[ParsedModule { + source_file: "onchain/validators/vesting.ak".to_string(), + module, + ast_debug: String::new(), + }]); + + assert_eq!(context.validators.len(), 1); + let validator = &context.validators[0]; + assert_eq!(validator.id, "onchain.validators.vesting.hello_world"); + assert_eq!(validator.handlers.len(), 2); + assert_eq!(validator.handlers[0].name, "spend"); + assert_eq!(validator.handlers[0].parameters[0].name, "datum"); + assert_eq!(validator.handlers[0].parameters[0].r#type, "Option"); + assert_eq!(validator.handlers[1].name, "else"); + assert_eq!(validator.handlers[1].parameters[0].r#type, "Unknown"); + assert!(validator.source_span.is_some()); + } + + #[test] + fn validator_context_is_sorted_deterministically() { + let src = r#" +validator zeta { + else(_) { True } +} + +validator alpha { + else(_) { True } +} +"#; + + let (module, _) = parser::module(src, ModuleKind::Validator).expect("parse module"); + + let context = build_validator_context_from_modules(&[ParsedModule { + source_file: "validators/sample.ak".to_string(), + module, + ast_debug: String::new(), + }]); + + let ids = context + .validators + .iter() + .map(|validator| validator.id.clone()) + .collect::>(); + + assert_eq!( + ids, + vec![ + "validators.sample.alpha".to_string(), + "validators.sample.zeta".to_string() + ] + ); + } + + #[test] + fn generate_ast_fails_when_cached_snapshot_is_invalid_json() { + let temp = tempfile::tempdir().expect("temp dir"); + let root = temp.path(); + let source = root.join("validators/broken.ak"); + let ast_out = root.join(".tx3/audit/aiken-ast.json"); + + fs::create_dir_all(source.parent().expect("parent")).expect("create parent dir"); + fs::write(&source, "validator ok { else(_) { True } }").expect("write source file"); + fs::create_dir_all(ast_out.parent().expect("parent")).expect("create ast dir"); + fs::write(&ast_out, "{ this is invalid json }").expect("write invalid ast cache"); + + let err = generate_ast_and_validator_context(root, &[source], &ast_out, false) + .expect_err("expected invalid cached ast json failure"); + + assert!(err.to_string().contains("AST output unreadable/invalid JSON")); + } + + #[test] + fn generate_ast_fails_when_aiken_source_cannot_be_parsed() { + let temp = tempfile::tempdir().expect("temp dir"); + let root = temp.path(); + let source = root.join("validators/invalid.ak"); + let ast_out = root.join(".tx3/audit/aiken-ast.json"); + + fs::create_dir_all(source.parent().expect("parent")).expect("create parent dir"); + fs::write(&source, "validator broken { spend(").expect("write invalid source file"); + + let err = generate_ast_and_validator_context(root, &[source], &ast_out, true) + .expect_err("expected parser failure"); + + assert!(err.to_string().contains("Aiken command failed")); + } +} diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index 195bfd3..80386db 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -5,12 +5,14 @@ use std::path::{Path, PathBuf}; use crate::config::{ProfileConfig, RootConfig}; +pub mod ast; pub mod model; pub mod providers; +use self::ast::generate_ast_and_validator_context; use self::model::{ AnalysisStateJson, MiniPrompt, PermissionPromptSpec, SkillIterationResult, - VulnerabilityFinding, VulnerabilityReportSpec, VulnerabilitySkill, + ValidatorContextMap, VulnerabilityFinding, VulnerabilityReportSpec, VulnerabilitySkill, }; use self::providers::{build_provider, AnalysisProvider}; @@ -65,6 +67,14 @@ pub struct Args { #[arg(long, default_value_t = false)] pub ai_logs: bool, + /// Path where the Aiken AST snapshot JSON will be written. + #[arg(long, default_value = ".tx3/audit/aiken-ast.json")] + pub ast_out: String, + + /// Regenerate AST even if an up-to-date snapshot is already available. + #[arg(long, default_value_t = false)] + pub no_ast_cache: bool, + /// File read scope for AI-assisted local tool requests: workspace | strict. #[arg(long, value_enum, default_value_t = ReadScopeArg::Workspace)] pub read_scope: ReadScopeArg, @@ -105,12 +115,13 @@ fn run_analysis( let skills_dir = PathBuf::from(&args.skills_dir); let state_out = PathBuf::from(&args.state_out); let report_out = PathBuf::from(&args.report_out); + let ast_out = PathBuf::from(&args.ast_out); let project_root = std::env::current_dir().into_diagnostic()?; - let source_files = discover_source_files(&project_root)?; - let source_files = if source_files.is_empty() { + let aiken_source_files = discover_source_files(&project_root)?; + let source_files = if aiken_source_files.is_empty() { vec![config.protocol.main.clone()] } else { - source_files + aiken_source_files.clone() }; log_audit_progress( @@ -129,6 +140,12 @@ fn run_analysis( &source_files, ); let skills = load_skills(&skills_dir, &args.skills_dir)?; + let ast_context = generate_ast_and_validator_context( + &project_root, + &aiken_source_files, + &ast_out, + args.no_ast_cache, + )?; let mut state = AnalysisStateJson { version: "1".to_string(), @@ -138,6 +155,8 @@ fn run_analysis( .collect(), provider: provider.provider_spec(), permission_prompt: permission_prompt.clone(), + ast: Some(ast_context.metadata.clone()), + validator_context: ast_context.validator_context.clone(), iterations: vec![], }; @@ -147,6 +166,7 @@ fn run_analysis( source_files: &source_files, project_root: &project_root, permission_prompt: &permission_prompt, + validator_context: &ast_context.validator_context, provider, ai_logs: args.ai_logs, state_out: &state_out, @@ -173,6 +193,7 @@ struct SkillLoopContext<'a> { source_files: &'a [PathBuf], project_root: &'a Path, permission_prompt: &'a PermissionPromptSpec, + validator_context: &'a ValidatorContextMap, provider: &'a dyn AnalysisProvider, ai_logs: bool, state_out: &'a Path, @@ -208,6 +229,7 @@ fn run_skill_loop( skill, &prompt, &source_references, + context.validator_context, context.project_root, context.permission_prompt, )?; diff --git a/src/commands/audit/model.rs b/src/commands/audit/model.rs index 47edb38..2a9d3a7 100644 --- a/src/commands/audit/model.rs +++ b/src/commands/audit/model.rs @@ -48,9 +48,61 @@ pub struct AnalysisStateJson { pub source_files: Vec, pub provider: ProviderSpec, pub permission_prompt: PermissionPromptSpec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ast: Option, + #[serde(default)] + pub validator_context: ValidatorContextMap, pub iterations: Vec, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AstMetadata { + pub path: String, + pub fingerprint: String, + pub generated_at: String, + pub tool: AstToolMetadata, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AstToolMetadata { + pub name: String, + pub version: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ValidatorContextMap { + #[serde(default)] + pub validators: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidatorContextEntry { + pub id: String, + pub module: String, + pub source_file: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub source_span: Option, + pub handlers: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SourceSpan { + pub start_line: usize, + pub end_line: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidatorHandlerContext { + pub name: String, + pub parameters: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidatorParameterContext { + pub name: String, + pub r#type: String, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ProviderSpec { pub name: String, diff --git a/src/commands/audit/providers/anthropic.rs b/src/commands/audit/providers/anthropic.rs index 19cbfaa..23edad5 100644 --- a/src/commands/audit/providers/anthropic.rs +++ b/src/commands/audit/providers/anthropic.rs @@ -14,7 +14,8 @@ use super::{ AnalysisProvider, }; use crate::commands::audit::model::{ - MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, VulnerabilitySkill, + MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, + VulnerabilitySkill, }; #[derive(Debug, Clone)] @@ -40,6 +41,7 @@ impl AnalysisProvider for AnthropicProvider { skill: &VulnerabilitySkill, prompt: &MiniPrompt, source_references: &[String], + validator_context: &ValidatorContextMap, project_root: &Path, permission_prompt: &PermissionPromptSpec, ) -> Result { @@ -51,8 +53,12 @@ impl AnalysisProvider for AnthropicProvider { })?; let system_prompt = build_agent_system_prompt(); - let initial_user_prompt = - build_initial_user_prompt(prompt, source_references, permission_prompt); + let initial_user_prompt = build_initial_user_prompt( + prompt, + source_references, + validator_context, + permission_prompt, + ); let mut messages = vec![serde_json::json!({ "role": "user", diff --git a/src/commands/audit/providers/mod.rs b/src/commands/audit/providers/mod.rs index 377027c..e594079 100644 --- a/src/commands/audit/providers/mod.rs +++ b/src/commands/audit/providers/mod.rs @@ -7,7 +7,8 @@ use miette::{Context, IntoDiagnostic, Result}; use std::path::Path; use super::model::{ - MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, VulnerabilitySkill, + MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, + VulnerabilitySkill, }; use super::Args; @@ -33,6 +34,7 @@ pub trait AnalysisProvider { skill: &VulnerabilitySkill, prompt: &MiniPrompt, source_references: &[String], + validator_context: &ValidatorContextMap, project_root: &Path, permission_prompt: &PermissionPromptSpec, ) -> Result; diff --git a/src/commands/audit/providers/openai.rs b/src/commands/audit/providers/openai.rs index 62fca6d..db22306 100644 --- a/src/commands/audit/providers/openai.rs +++ b/src/commands/audit/providers/openai.rs @@ -14,7 +14,8 @@ use super::{ AnalysisProvider, }; use crate::commands::audit::model::{ - MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, VulnerabilitySkill, + MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, + VulnerabilitySkill, }; #[derive(Debug, Clone)] @@ -39,6 +40,7 @@ impl AnalysisProvider for OpenAiProvider { skill: &VulnerabilitySkill, prompt: &MiniPrompt, source_references: &[String], + validator_context: &ValidatorContextMap, project_root: &Path, permission_prompt: &PermissionPromptSpec, ) -> Result { @@ -50,8 +52,12 @@ impl AnalysisProvider for OpenAiProvider { })?; let system_prompt = build_agent_system_prompt(); - let initial_user_prompt = - build_initial_user_prompt(prompt, source_references, permission_prompt); + let initial_user_prompt = build_initial_user_prompt( + prompt, + source_references, + validator_context, + permission_prompt, + ); let mut messages = vec![ serde_json::json!({ diff --git a/src/commands/audit/providers/scaffold.rs b/src/commands/audit/providers/scaffold.rs index 044f485..d9b12e9 100644 --- a/src/commands/audit/providers/scaffold.rs +++ b/src/commands/audit/providers/scaffold.rs @@ -3,7 +3,8 @@ use std::path::Path; use super::AnalysisProvider; use crate::commands::audit::model::{ - MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, VulnerabilitySkill, + MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, + VulnerabilitySkill, }; #[derive(Debug, Default)] @@ -23,6 +24,7 @@ impl AnalysisProvider for ScaffoldProvider { skill: &VulnerabilitySkill, prompt: &MiniPrompt, _source_references: &[String], + _validator_context: &ValidatorContextMap, _project_root: &Path, _permission_prompt: &PermissionPromptSpec, ) -> Result { diff --git a/src/commands/audit/providers/shared.rs b/src/commands/audit/providers/shared.rs index e4a0436..37f83fb 100644 --- a/src/commands/audit/providers/shared.rs +++ b/src/commands/audit/providers/shared.rs @@ -7,7 +7,8 @@ use std::process::Command; use tokio::runtime::Handle; use crate::commands::audit::model::{ - MiniPrompt, PermissionPromptSpec, SkillIterationResult, VulnerabilityFinding, + MiniPrompt, PermissionPromptSpec, SkillIterationResult, ValidatorContextMap, + VulnerabilityFinding, VulnerabilitySkill, }; @@ -75,11 +76,16 @@ fn parse_line_number(value: Option<&Value>) -> Option { pub(super) fn build_initial_user_prompt( prompt: &MiniPrompt, source_references: &[String], + validator_context: &ValidatorContextMap, permission_prompt: &PermissionPromptSpec, ) -> String { INITIAL_USER_PROMPT_TEMPLATE .replace("{{SKILL}}", &prompt.text) .replace("{{SOURCE_REFERENCES}}", &render_source_references(source_references)) + .replace( + "{{VALIDATOR_CONTEXT_MAP}}", + &render_validator_context_map(validator_context), + ) .replace( "{{PERMISSION_PROMPT}}", &render_permission_prompt(permission_prompt), @@ -114,6 +120,40 @@ fn render_source_references(source_references: &[String]) -> String { .join("\n") } +fn render_validator_context_map(validator_context: &ValidatorContextMap) -> String { + if validator_context.validators.is_empty() { + return "- (none)".to_string(); + } + + validator_context + .validators + .iter() + .map(|validator| { + let handlers = validator + .handlers + .iter() + .map(|handler| { + let signature = handler + .parameters + .iter() + .map(|parameter| format!("{}: {}", parameter.name, parameter.r#type)) + .collect::>() + .join(", "); + + format!(" - `{}({})`", handler.name, signature) + }) + .collect::>() + .join("\n"); + + format!( + "- `{}`\n - source: `{}`\n{}", + validator.id, validator.source_file, handlers + ) + }) + .collect::>() + .join("\n") +} + pub(super) fn parse_agent_action(content: &str) -> Result { let parsed = parse_structured_content(content)?; @@ -606,7 +646,10 @@ pub(super) fn iteration_from_parsed( #[cfg(test)] mod tests { use super::*; - use crate::commands::audit::model::PermissionPromptSpec; + use crate::commands::audit::model::{ + MiniPrompt, PermissionPromptSpec, ValidatorContextEntry, ValidatorContextMap, + ValidatorHandlerContext, ValidatorParameterContext, + }; #[test] fn execute_read_request_strict_allows_known_file() { @@ -665,4 +708,73 @@ mod tests { assert!(err.to_string().contains("strict read scope")); } + + #[test] + fn initial_prompt_renders_validator_context_map() { + let permission_prompt = PermissionPromptSpec { + shell: "bash".to_string(), + allowed_commands: vec!["cat".to_string()], + scope_rules: vec!["rule".to_string()], + workspace_root: ".".to_string(), + read_scope: "workspace".to_string(), + interactive_permissions: false, + allowed_paths: vec![], + }; + + let validator_context = ValidatorContextMap { + validators: vec![ValidatorContextEntry { + id: "validators.vesting.hello_world".to_string(), + module: "validators/vesting.ak".to_string(), + source_file: "validators/vesting.ak".to_string(), + source_span: None, + handlers: vec![ValidatorHandlerContext { + name: "spend".to_string(), + parameters: vec![ValidatorParameterContext { + name: "datum".to_string(), + r#type: "Option".to_string(), + }], + }], + }], + }; + + let prompt = build_initial_user_prompt( + &MiniPrompt { + skill_id: "s1".to_string(), + text: "skill".to_string(), + }, + &["validators/vesting.ak".to_string()], + &validator_context, + &permission_prompt, + ); + + assert!(prompt.contains("Validator context map:")); + assert!(prompt.contains("validators.vesting.hello_world")); + assert!(prompt.contains("spend(datum: Option)")); + } + + #[test] + fn initial_prompt_renders_empty_validator_context_map() { + let permission_prompt = PermissionPromptSpec { + shell: "bash".to_string(), + allowed_commands: vec!["cat".to_string()], + scope_rules: vec!["rule".to_string()], + workspace_root: ".".to_string(), + read_scope: "workspace".to_string(), + interactive_permissions: false, + allowed_paths: vec![], + }; + + let prompt = build_initial_user_prompt( + &MiniPrompt { + skill_id: "s1".to_string(), + text: "skill".to_string(), + }, + &[], + &ValidatorContextMap::default(), + &permission_prompt, + ); + + assert!(prompt.contains("Validator context map:")); + assert!(prompt.contains("- (none)")); + } } diff --git a/templates/aiken/audit_agent_initial_user_prompt.md b/templates/aiken/audit_agent_initial_user_prompt.md index a81ebe1..1134690 100644 --- a/templates/aiken/audit_agent_initial_user_prompt.md +++ b/templates/aiken/audit_agent_initial_user_prompt.md @@ -12,9 +12,16 @@ Skill (authoritative context): Referenced Aiken files: {{SOURCE_REFERENCES}} +Validator context map: +--- CONTEXT MAP START --- +{{VALIDATOR_CONTEXT_MAP}} +--- CONTEXT MAP END --- + Use the referenced files as your starting point. You may read additional files only if they are inside the allowed workspace scope and strictly required to validate the finding. Execution permissions: +--- PERMISSION PROMPT START --- {{PERMISSION_PROMPT}} +--- PERMISSION PROMPT END --- Return JSON action only. \ No newline at end of file diff --git a/tests/e2e/happy_path.rs b/tests/e2e/happy_path.rs index 496508d..e292409 100644 --- a/tests/e2e/happy_path.rs +++ b/tests/e2e/happy_path.rs @@ -160,6 +160,7 @@ fn aiken_audit_runs_in_initialized_project() { assert_output_contains(&result, "EXPERIMENTAL"); ctx.assert_file_exists(".tx3/audit/state.json"); + ctx.assert_file_exists(".tx3/audit/aiken-ast.json"); ctx.assert_file_exists(".tx3/audit/vulnerabilities.md"); let state_content = ctx.read_file(".tx3/audit/state.json"); @@ -167,6 +168,11 @@ fn aiken_audit_runs_in_initialized_project() { serde_json::from_str(&state_content).expect("state.json should be valid AnalysisStateJson"); assert_eq!(state.version, "1"); + assert!(state.ast.is_some(), "expected AST metadata to be present"); + assert!( + state.validator_context.validators.is_empty(), + "fresh init project should typically have no Aiken validators" + ); assert!( !state.iterations.is_empty(), "expected at least one analysis iteration" From c302d94f16d47b2a8c81eb622379b00ac429ede6 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Fri, 27 Feb 2026 17:49:26 -0300 Subject: [PATCH 15/18] feat: add reasoning effort hint to OpenAI provider and update audit agent system prompt --- src/commands/audit/mod.rs | 4 + src/commands/audit/providers/anthropic.rs | 6 +- src/commands/audit/providers/mod.rs | 6 +- src/commands/audit/providers/openai.rs | 989 ++++++++++++++++++- src/commands/audit/providers/shared.rs | 29 +- templates/aiken/audit_agent_system_prompt.md | 3 +- 6 files changed, 951 insertions(+), 86 deletions(-) diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index 80386db..1cffb9e 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -67,6 +67,10 @@ pub struct Args { #[arg(long, default_value_t = false)] pub ai_logs: bool, + /// Optional reasoning effort hint for OpenAI-compatible providers (e.g. low|medium|high). + #[arg(long)] + pub reasoning_effort: Option, + /// Path where the Aiken AST snapshot JSON will be written. #[arg(long, default_value = ".tx3/audit/aiken-ast.json")] pub ast_out: String, diff --git a/src/commands/audit/providers/anthropic.rs b/src/commands/audit/providers/anthropic.rs index 23edad5..c3fddff 100644 --- a/src/commands/audit/providers/anthropic.rs +++ b/src/commands/audit/providers/anthropic.rs @@ -6,7 +6,7 @@ use super::shared::{ block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, build_tool_result_user_prompt, describe_read_request_friendly, execute_read_request, iteration_from_parsed, log_agent_progress, - parse_agent_action, render_model_output_for_log, render_tool_output_for_log, + parse_agent_action, render_tool_output_for_log, summarize_read_request, AgentAction, MAX_AGENT_STEPS, }; @@ -115,7 +115,7 @@ impl AnalysisProvider for AnthropicProvider { self.ai_logs, format!( "Model output:\n{}", - render_model_output_for_log(content, 2_000) + &content ), ); @@ -168,7 +168,7 @@ impl AnalysisProvider for AnthropicProvider { self.ai_logs, format!( "Tool output:\n{}", - render_tool_output_for_log(&request, &output, 2_000) + render_tool_output_for_log(&request, &output) ), ); diff --git a/src/commands/audit/providers/mod.rs b/src/commands/audit/providers/mod.rs index e594079..b114345 100644 --- a/src/commands/audit/providers/mod.rs +++ b/src/commands/audit/providers/mod.rs @@ -16,7 +16,7 @@ use self::anthropic::AnthropicProvider; use self::openai::OpenAiProvider; use self::scaffold::ScaffoldProvider; -const DEFAULT_AI_ENDPOINT: &str = "https://api.openai.com/v1/chat/completions"; +const DEFAULT_AI_ENDPOINT: &str = "https://api.openai.com/v1/responses"; const DEFAULT_AI_MODEL: &str = "gpt-4.1-mini"; const DEFAULT_AI_API_KEY_ENV: &str = "OPENAI_API_KEY"; const DEFAULT_ANTHROPIC_ENDPOINT: &str = "https://api.anthropic.com/v1/messages"; @@ -69,6 +69,8 @@ pub fn build_provider(args: &Args) -> Result> { api_key, model, ai_logs: args.ai_logs, + reasoning_effort: args.reasoning_effort.clone(), + ollama_compat: false, })) } "anthropic" => { @@ -113,6 +115,8 @@ pub fn build_provider(args: &Args) -> Result> { .clone() .unwrap_or_else(|| DEFAULT_OLLAMA_MODEL.to_string()), ai_logs: args.ai_logs, + reasoning_effort: args.reasoning_effort.clone(), + ollama_compat: true, })), value => Err(miette::miette!( "Unsupported provider '{}'. Expected one of: scaffold, openai, anthropic, ollama", diff --git a/src/commands/audit/providers/openai.rs b/src/commands/audit/providers/openai.rs index db22306..895526d 100644 --- a/src/commands/audit/providers/openai.rs +++ b/src/commands/audit/providers/openai.rs @@ -1,18 +1,16 @@ use miette::{Context, IntoDiagnostic, Result}; -use serde_json::Value; +use serde_json::{json, Value}; +use std::io::{self, Write}; use std::path::Path; +use std::time::Instant; use super::shared::{ block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, - build_tool_result_user_prompt, - describe_read_request_friendly, execute_read_request, iteration_from_parsed, log_agent_progress, - parse_agent_action, render_model_output_for_log, render_tool_output_for_log, - summarize_read_request, AgentAction, - MAX_AGENT_STEPS, -}; -use super::{ - AnalysisProvider, + build_tool_result_user_prompt, describe_read_request_friendly, execute_read_request, + iteration_from_parsed, log_agent_progress, parse_agent_action, + render_tool_output_for_log, summarize_read_request, AgentAction, MAX_AGENT_STEPS, }; +use super::AnalysisProvider; use crate::commands::audit::model::{ MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, VulnerabilitySkill, @@ -24,14 +22,756 @@ pub struct OpenAiProvider { pub api_key: String, pub model: String, pub ai_logs: bool, + pub reasoning_effort: Option, + pub ollama_compat: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ApiFamily { + ChatCompletions, + Responses, +} + +fn detect_api_family(endpoint: &str, ollama_compat: bool) -> ApiFamily { + if ollama_compat { + return ApiFamily::ChatCompletions; + } + + if endpoint.contains("/responses") { + ApiFamily::Responses + } else { + ApiFamily::ChatCompletions + } +} + +fn build_chat_payload_variants( + model: &str, + messages: &[Value], + stream: bool, + reasoning_effort: Option<&str>, + ollama_compat: bool, +) -> Vec { + let mut base = json!({ + "model": model, + "messages": messages, + "response_format": { + "type": "json_object" + } + }); + + if stream { + base["stream"] = Value::Bool(true); + } + + let mut variants = vec![base.clone()]; + + if ollama_compat { + let mut with_ollama_think = base.clone(); + with_ollama_think["think"] = Value::Bool(true); + variants.insert(0, with_ollama_think); + } + + let Some(effort_raw) = reasoning_effort else { + return variants; + }; + + let effort = effort_raw.trim(); + if effort.is_empty() { + return variants; + } + + let mut with_reasoning_object = base.clone(); + with_reasoning_object["reasoning"] = json!({ "effort": effort }); + + let mut with_reasoning_effort = base.clone(); + with_reasoning_effort["reasoning_effort"] = Value::String(effort.to_string()); + + let mut with_reasoning_object_and_ollama = with_reasoning_object.clone(); + with_reasoning_object_and_ollama["think"] = Value::Bool(true); + + let mut with_reasoning_effort_and_ollama = with_reasoning_effort.clone(); + with_reasoning_effort_and_ollama["think"] = Value::Bool(true); + + if ollama_compat { + vec![ + with_reasoning_object_and_ollama, + with_reasoning_effort_and_ollama, + with_reasoning_object, + with_reasoning_effort, + base, + ] + } else { + vec![with_reasoning_object, with_reasoning_effort, base] + } +} + +fn build_responses_payload_variants( + model: &str, + messages: &[Value], + stream: bool, + reasoning_effort: Option<&str>, +) -> Vec { + let input = messages_to_responses_input(messages); + + let mut base = json!({ + "model": model, + "input": input, + "text": { + "format": { + "type": "json_object" + } + } + }); + + if stream { + base["stream"] = Value::Bool(true); + } + + let Some(effort_raw) = reasoning_effort else { + return vec![base]; + }; + + let effort = effort_raw.trim(); + if effort.is_empty() { + return vec![base]; + } + + let mut with_reasoning_summary = base.clone(); + with_reasoning_summary["reasoning"] = json!({ + "effort": effort, + "summary": "auto" + }); + + let mut with_reasoning_effort = base.clone(); + with_reasoning_effort["reasoning"] = json!({ "effort": effort }); + + vec![with_reasoning_summary, with_reasoning_effort, base] +} + +fn messages_to_responses_input(messages: &[Value]) -> Vec { + messages + .iter() + .map(|message| { + let role = message + .get("role") + .and_then(Value::as_str) + .unwrap_or("user"); + let content = message.get("content").unwrap_or(&Value::Null); + + json!({ + "role": role, + "content": normalize_responses_input_content(role, content) + }) + }) + .collect() +} + +fn text_block_type_for_role(role: &str) -> &'static str { + if role.eq_ignore_ascii_case("assistant") { + "output_text" + } else { + "input_text" + } +} + +fn normalize_responses_input_content(role: &str, content: &Value) -> Value { + let text_block_type = text_block_type_for_role(role); + + if let Some(text) = content.as_str() { + return json!([ + { + "type": text_block_type, + "text": text + } + ]); + } + + if let Some(chunks) = content.as_array() { + let normalized = chunks + .iter() + .map(|chunk| { + if let Some(text) = chunk.get("text").and_then(Value::as_str) { + json!({ + "type": text_block_type, + "text": text + }) + } else { + chunk.clone() + } + }) + .collect::>(); + + return Value::Array(normalized); + } + + json!([ + { + "type": text_block_type, + "text": content.to_string() + } + ]) +} + +#[derive(Debug, Clone, Copy, Default)] +struct ReasoningStreamState { + started: bool, + line_break_emitted: bool, + last_summary_index: Option, +} + +#[derive(Debug, Clone, Copy, Default)] +struct ContentStreamState { + started: bool, + ends_with_newline: bool, +} + +fn stream_reasoning_delta_to_stdout( + enabled: bool, + state: &mut ReasoningStreamState, + delta: &str, +) { + if !enabled || delta.is_empty() { + return; + } + + let mut stdout = io::stdout().lock(); + + if !state.started { + let _ = writeln!(stdout, "🤖 🧠 Reasoning summary:"); + state.started = true; + } + + let _ = write!(stdout, "{}", delta); + let _ = stdout.flush(); + state.line_break_emitted = false; +} + +fn emit_reasoning_line_break(enabled: bool, state: &mut ReasoningStreamState) { + if !enabled || !state.started || state.line_break_emitted { + return; + } + + let mut stdout = io::stdout().lock(); + let _ = writeln!(stdout); + let _ = stdout.flush(); + state.line_break_emitted = true; +} + +fn emit_reasoning_double_line_break(enabled: bool, state: &mut ReasoningStreamState) { + if !enabled || !state.started || state.line_break_emitted { + return; + } + + let mut stdout = io::stdout().lock(); + let _ = write!(stdout, "\n\n"); + let _ = stdout.flush(); + state.line_break_emitted = true; +} + +fn finalize_reasoning_stdout(enabled: bool, state: &mut ReasoningStreamState) { + emit_reasoning_line_break(enabled, state); +} + +fn extract_summary_index(event: &Value) -> Option { + event + .get("summary_index") + .and_then(Value::as_i64) + .or_else(|| event.pointer("/summary/index").and_then(Value::as_i64)) +} + +fn maybe_emit_reasoning_line_break_on_summary_change( + enabled: bool, + state: &mut ReasoningStreamState, + summary_index: Option, +) { + let Some(current_index) = summary_index else { + return; + }; + + if let Some(previous_index) = state.last_summary_index { + if previous_index != current_index { + emit_reasoning_double_line_break(enabled, state); + } + } + + state.last_summary_index = Some(current_index); +} + +fn stream_content_delta_to_stdout(enabled: bool, state: &mut ContentStreamState, delta: &str) { + if !enabled || delta.is_empty() { + return; + } + + let mut stdout = io::stdout().lock(); + + if !state.started { + let _ = write!(stdout, "🤖 ↳ Output: "); + state.started = true; + state.ends_with_newline = false; + } + + let _ = write!(stdout, "{}", delta); + let _ = stdout.flush(); + + state.ends_with_newline = delta.ends_with('\n'); +} + +fn finalize_content_stdout(enabled: bool, state: &mut ContentStreamState) { + if !enabled || !state.started || state.ends_with_newline { + return; + } + + let mut stdout = io::stdout().lock(); + let _ = writeln!(stdout); + let _ = stdout.flush(); + state.ends_with_newline = true; +} + +fn extract_chat_reasoning_delta(event: &Value) -> Option { + event + .pointer("/choices/0/delta/reasoning_content") + .and_then(Value::as_str) + .or_else(|| { + event + .pointer("/choices/0/delta/reasoning") + .and_then(Value::as_str) + }) + .or_else(|| { + event + .pointer("/choices/0/delta/thinking") + .and_then(Value::as_str) + }) + .map(ToString::to_string) +} + +fn extract_chat_content_delta(event: &Value) -> Option { + event + .pointer("/choices/0/delta/content") + .and_then(Value::as_str) + .map(ToString::to_string) +} + +fn extract_responses_reasoning_delta(event: &Value) -> Option { + let event_type = event + .get("type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + let is_delta_event = event_type.ends_with(".delta"); + let is_reasoning_event = event_type.contains("reasoning") || event_type.contains("summary"); + + if !(is_delta_event && is_reasoning_event) { + return None; + } + + event + .get("delta") + .and_then(Value::as_str) + .or_else(|| event.get("text").and_then(Value::as_str)) + .or_else(|| event.pointer("/summary/text").and_then(Value::as_str)) + .map(ToString::to_string) +} + +fn extract_responses_content_delta(event: &Value) -> Option { + let event_type = event + .get("type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + if !event_type.ends_with(".delta") { + return None; + } + + if event_type.contains("reasoning") || event_type.contains("summary") { + return None; + } + + if event_type.contains("output_text") || event_type.contains("message") { + return event + .get("delta") + .and_then(Value::as_str) + .or_else(|| event.get("text").and_then(Value::as_str)) + .or_else(|| event.pointer("/content/delta").and_then(Value::as_str)) + .map(ToString::to_string); + } + + None +} + +fn extract_responses_output_text(response_json: &Value) -> Option { + if let Some(output_text) = response_json.get("output_text").and_then(Value::as_str) { + if !output_text.trim().is_empty() { + return Some(output_text.to_string()); + } + } + + let mut chunks = Vec::new(); + + if let Some(outputs) = response_json.get("output").and_then(Value::as_array) { + for item in outputs { + let item_type = item + .get("type") + .and_then(Value::as_str) + .unwrap_or_default(); + + if (item_type == "output_text" || item_type == "text") + && item.get("text").and_then(Value::as_str).is_some() + { + if let Some(text) = item.get("text").and_then(Value::as_str) { + if !text.trim().is_empty() { + chunks.push(text.to_string()); + } + } + + continue; + } + + if let Some(content) = item.get("content").and_then(Value::as_array) { + for block in content { + let block_type = block + .get("type") + .and_then(Value::as_str) + .unwrap_or_default(); + if (block_type == "output_text" || block_type == "text") + && block.get("text").and_then(Value::as_str).is_some() + { + if let Some(text) = block.get("text").and_then(Value::as_str) { + if !text.trim().is_empty() { + chunks.push(text.to_string()); + } + } + } + } + } + } + } + + if chunks.is_empty() { + None + } else { + Some(chunks.join("")) + } +} + +fn extract_responses_reasoning_summary(response_json: &Value) -> Option { + let mut chunks = Vec::new(); + + if let Some(outputs) = response_json.get("output").and_then(Value::as_array) { + for item in outputs { + let item_type = item + .get("type") + .and_then(Value::as_str) + .unwrap_or_default(); + + if item_type != "reasoning" { + continue; + } + + if let Some(summary_text) = item.get("summary").and_then(Value::as_str) { + if !summary_text.trim().is_empty() { + chunks.push(summary_text.to_string()); + } + } + + if let Some(summary_items) = item.get("summary").and_then(Value::as_array) { + for entry in summary_items { + if let Some(text) = entry.get("text").and_then(Value::as_str) { + if !text.trim().is_empty() { + chunks.push(text.to_string()); + } + } + } + } + } + } + + if chunks.is_empty() { + None + } else { + Some(chunks.join("\n")) + } +} + +async fn stream_chat_attempt( + client: &reqwest::Client, + endpoint: &str, + api_key: &str, + payload: &Value, + ai_logs: bool, +) -> Result { + let mut response = client + .post(endpoint) + .bearer_auth(api_key) + .json(payload) + .send() + .await + .into_diagnostic()?; + + let status = response.status(); + if !status.is_success() { + let body = response.text().await.into_diagnostic()?; + return Err(miette::miette!( + "Streaming request failed with status {}: {}", + status, + body + )); + } + + let mut pending = String::new(); + let mut model_output = String::new(); + let mut reasoning_output = String::new(); + let mut reasoning_stream_state = ReasoningStreamState::default(); + let mut content_stream_state = ContentStreamState::default(); + + while let Some(chunk) = response.chunk().await.into_diagnostic()? { + pending.push_str(&String::from_utf8_lossy(&chunk)); + + while let Some(newline_index) = pending.find('\n') { + let line = pending[..newline_index].trim_end_matches('\r').to_string(); + pending.drain(..=newline_index); + + let line = line.trim(); + if line.is_empty() || !line.starts_with("data:") { + continue; + } + + let event_data = line[5..].trim(); + if event_data == "[DONE]" { + break; + } + + let event: Value = match serde_json::from_str(event_data) { + Ok(parsed) => parsed, + Err(_) => continue, + }; + + if let Some(reasoning_delta) = extract_chat_reasoning_delta(&event) { + reasoning_output.push_str(&reasoning_delta); + stream_reasoning_delta_to_stdout( + ai_logs, + &mut reasoning_stream_state, + &reasoning_delta, + ); + } + + if let Some(content_delta) = extract_chat_content_delta(&event) { + emit_reasoning_line_break(ai_logs, &mut reasoning_stream_state); + model_output.push_str(&content_delta); + stream_content_delta_to_stdout(ai_logs, &mut content_stream_state, &content_delta); + } + } + } + + finalize_content_stdout(ai_logs, &mut content_stream_state); + finalize_reasoning_stdout(ai_logs, &mut reasoning_stream_state); + + if model_output.is_empty() { + return Err(miette::miette!( + "Streaming response did not include content deltas" + )); + } + + let _ = reasoning_output; + + Ok(model_output) +} + +async fn stream_responses_attempt( + client: &reqwest::Client, + endpoint: &str, + api_key: &str, + payload: &Value, + ai_logs: bool, +) -> Result { + let mut response = client + .post(endpoint) + .bearer_auth(api_key) + .json(payload) + .send() + .await + .into_diagnostic()?; + + let status = response.status(); + if !status.is_success() { + let body = response.text().await.into_diagnostic()?; + return Err(miette::miette!( + "Streaming request failed with status {}: {}", + status, + body + )); + } + + let mut pending = String::new(); + let mut model_output = String::new(); + let mut reasoning_output = String::new(); + let mut reasoning_stream_state = ReasoningStreamState::default(); + let mut content_stream_state = ContentStreamState::default(); + + while let Some(chunk) = response.chunk().await.into_diagnostic()? { + pending.push_str(&String::from_utf8_lossy(&chunk)); + + while let Some(newline_index) = pending.find('\n') { + let line = pending[..newline_index].trim_end_matches('\r').to_string(); + pending.drain(..=newline_index); + + let line = line.trim(); + if line.is_empty() || !line.starts_with("data:") { + continue; + } + + let event_data = line[5..].trim(); + if event_data == "[DONE]" { + break; + } + + let event: Value = match serde_json::from_str(event_data) { + Ok(parsed) => parsed, + Err(_) => continue, + }; + + if let Some(reasoning_delta) = extract_responses_reasoning_delta(&event) { + maybe_emit_reasoning_line_break_on_summary_change( + ai_logs, + &mut reasoning_stream_state, + extract_summary_index(&event), + ); + reasoning_output.push_str(&reasoning_delta); + stream_reasoning_delta_to_stdout( + ai_logs, + &mut reasoning_stream_state, + &reasoning_delta, + ); + } + + if let Some(content_delta) = extract_responses_content_delta(&event) { + emit_reasoning_line_break(ai_logs, &mut reasoning_stream_state); + model_output.push_str(&content_delta); + stream_content_delta_to_stdout(ai_logs, &mut content_stream_state, &content_delta); + } + } + } + + finalize_content_stdout(ai_logs, &mut content_stream_state); + finalize_reasoning_stdout(ai_logs, &mut reasoning_stream_state); + + if model_output.is_empty() { + return Err(miette::miette!( + "Streaming response did not include output text deltas" + )); + } + + let _ = reasoning_output; + + Ok(model_output) +} + +async fn non_stream_chat_attempt( + client: &reqwest::Client, + endpoint: &str, + api_key: &str, + payload: &Value, + ai_logs: bool, +) -> Result { + let response = client + .post(endpoint) + .bearer_auth(api_key) + .json(payload) + .send() + .await + .into_diagnostic()?; + + let response = response.error_for_status().into_diagnostic()?; + let response_json = response.json::().await.into_diagnostic()?; + + let content = response_json + .pointer("/choices/0/message/content") + .and_then(Value::as_str) + .ok_or_else(|| miette::miette!("AI provider returned an unexpected response payload"))?; + + if let Some(reasoning_text) = response_json + .pointer("/choices/0/message/reasoning_content") + .and_then(Value::as_str) + .or_else(|| { + response_json + .pointer("/choices/0/message/reasoning") + .and_then(Value::as_str) + }) + .or_else(|| { + response_json + .pointer("/choices/0/message/thinking") + .and_then(Value::as_str) + }) + { + log_agent_progress( + ai_logs, + format!( + "🧠 Model reasoning output:\n{}", + &reasoning_text + ), + ); + } + + Ok(content.to_string()) +} + +async fn non_stream_responses_attempt( + client: &reqwest::Client, + endpoint: &str, + api_key: &str, + payload: &Value, + ai_logs: bool, +) -> Result { + let response = client + .post(endpoint) + .bearer_auth(api_key) + .json(payload) + .send() + .await + .into_diagnostic()?; + + let response = response.error_for_status().into_diagnostic()?; + let response_json = response.json::().await.into_diagnostic()?; + + let content = extract_responses_output_text(&response_json) + .ok_or_else(|| miette::miette!("AI provider returned an unexpected response payload"))?; + + if let Some(reasoning_summary) = extract_responses_reasoning_summary(&response_json) { + log_agent_progress( + ai_logs, + format!( + "🧠 Model reasoning summary:\n{}", + &reasoning_summary + ), + ); + } + + Ok(content) } impl AnalysisProvider for OpenAiProvider { fn provider_spec(&self) -> ProviderSpec { + let api_family = detect_api_family(&self.endpoint, self.ollama_compat); + let api_note = match api_family { + ApiFamily::ChatCompletions => "chat-completions", + ApiFamily::Responses => "responses", + }; + + let reasoning_note = self + .reasoning_effort + .as_deref() + .filter(|value| !value.trim().is_empty()) + .map(|value| format!(", reasoning_effort={}", value)) + .unwrap_or_default(); + ProviderSpec { name: "openai-compatible".to_string(), model: Some(self.model.clone()), - notes: format!("Endpoint: {}", self.endpoint), + notes: format!( + "Endpoint: {} (api={}){}", + self.endpoint, api_note, reasoning_note + ), } } @@ -51,6 +791,8 @@ impl AnalysisProvider for OpenAiProvider { ) })?; + let api_family = detect_api_family(&self.endpoint, self.ollama_compat); + let system_prompt = build_agent_system_prompt(); let initial_user_prompt = build_initial_user_prompt( prompt, @@ -60,11 +802,11 @@ impl AnalysisProvider for OpenAiProvider { ); let mut messages = vec![ - serde_json::json!({ + json!({ "role": "system", "content": system_prompt, }), - serde_json::json!({ + json!({ "role": "user", "content": initial_user_prompt, }), @@ -82,49 +824,182 @@ impl AnalysisProvider for OpenAiProvider { ), ); - let payload = serde_json::json!({ - "model": self.model, - "messages": messages.clone(), - "response_format": { - "type": "json_object" + log_agent_progress( + self.ai_logs, + format!( + "🤔 Thinking… waiting for model response (step {}/{}, skill='{}')", + step_idx + 1, + MAX_AGENT_STEPS, + skill.id + ), + ); + + let request_started_at = Instant::now(); + let response_content_result = block_on_runtime_aware(async { + let client = reqwest::Client::new(); + let reasoning_effort = self.reasoning_effort.as_deref(); + + if self.ai_logs { + let mut last_stream_error: Option = None; + let stream_payloads = match api_family { + ApiFamily::ChatCompletions => build_chat_payload_variants( + &self.model, + &messages, + true, + reasoning_effort, + self.ollama_compat, + ), + ApiFamily::Responses => build_responses_payload_variants( + &self.model, + &messages, + true, + reasoning_effort, + ), + }; + + for (attempt_idx, stream_payload) in stream_payloads.iter().enumerate() { + let stream_attempt = match api_family { + ApiFamily::ChatCompletions => { + stream_chat_attempt( + &client, + &self.endpoint, + &self.api_key, + stream_payload, + self.ai_logs, + ) + .await + } + ApiFamily::Responses => { + stream_responses_attempt( + &client, + &self.endpoint, + &self.api_key, + stream_payload, + self.ai_logs, + ) + .await + } + }; + + match stream_attempt { + Ok(content) => return Ok(content), + Err(error) => { + last_stream_error = Some(error.to_string()); + log_agent_progress( + self.ai_logs, + format!( + "⚠️ Streaming attempt {} failed: {}", + attempt_idx + 1, + error + ), + ); + } + } + } + + if let Some(error) = last_stream_error { + log_agent_progress( + self.ai_logs, + format!( + "⚠️ Streaming unavailable, falling back to non-stream request: {}", + error + ), + ); + } + } + + let non_stream_payloads = match api_family { + ApiFamily::ChatCompletions => build_chat_payload_variants( + &self.model, + &messages, + false, + reasoning_effort, + self.ollama_compat, + ), + ApiFamily::Responses => { + build_responses_payload_variants(&self.model, &messages, false, reasoning_effort) + } + }; + + let mut last_non_stream_error: Option = None; + + for (attempt_idx, payload) in non_stream_payloads.iter().enumerate() { + let request_result = match api_family { + ApiFamily::ChatCompletions => { + non_stream_chat_attempt( + &client, + &self.endpoint, + &self.api_key, + payload, + self.ai_logs, + ) + .await + } + ApiFamily::Responses => { + non_stream_responses_attempt( + &client, + &self.endpoint, + &self.api_key, + payload, + self.ai_logs, + ) + .await + } + }; + + match request_result { + Ok(content) => return Ok(content), + Err(error) => { + last_non_stream_error = Some(error.to_string()); + log_agent_progress( + self.ai_logs, + format!( + "⚠️ Non-stream attempt {} failed: {}", + attempt_idx + 1, + error + ), + ); + } + } } + + Err(miette::miette!( + "All non-stream model request attempts failed: {}", + last_non_stream_error.unwrap_or_else(|| "unknown error".to_string()) + )) }); - let response_json = block_on_runtime_aware(async { - let client = reqwest::Client::new(); - let response = client - .post(&self.endpoint) - .bearer_auth(&self.api_key) - .json(&payload) - .send() - .await - .into_diagnostic()?; - - let response = response.error_for_status().into_diagnostic()?; - response.json::().await.into_diagnostic() - })?; - - let content = response_json - .pointer("/choices/0/message/content") - .and_then(Value::as_str) - .ok_or_else(|| { - miette::miette!("AI provider returned an unexpected response payload") - })?; + let elapsed = request_started_at.elapsed(); - messages.push(serde_json::json!({ + if let Err(error) = &response_content_result { + log_agent_progress( + self.ai_logs, + format!( + "❌ Model request failed after {} ms: {}", + elapsed.as_millis(), + error + ), + ); + } else { + log_agent_progress( + self.ai_logs, + format!("✅ Model response received in {} ms", elapsed.as_millis()), + ); + } + + let content = response_content_result?; + + messages.push(json!({ "role": "assistant", "content": content, })); log_agent_progress( self.ai_logs, - format!( - "Model output:\n{}", - render_model_output_for_log(content, 2_000) - ), + format!("Model output:\n{}", &content), ); - match parse_agent_action(content)? { + match parse_agent_action(&content)? { AgentAction::Final(parsed) => { let findings = parsed .get("findings") @@ -135,6 +1010,20 @@ impl AnalysisProvider for OpenAiProvider { .get("status") .and_then(Value::as_str) .unwrap_or("completed"); + let analysis_summary = parsed + .get("analysis_summary") + .and_then(Value::as_str) + .filter(|value| !value.trim().is_empty()); + + if let Some(summary) = analysis_summary { + log_agent_progress( + self.ai_logs, + format!( + "Model analysis summary:\n{}", + summary + ), + ); + } log_agent_progress( self.ai_logs, @@ -160,10 +1049,7 @@ impl AnalysisProvider for OpenAiProvider { log_agent_progress( self.ai_logs, - format!( - "Running local action: {}", - summarize_read_request(&request) - ), + format!("Running local action: {}", summarize_read_request(&request)), ); let output = execute_read_request(&request, &canonical_root, permission_prompt) @@ -173,16 +1059,13 @@ impl AnalysisProvider for OpenAiProvider { self.ai_logs, format!( "Tool output:\n{}", - render_tool_output_for_log(&request, &output, 2_000) + render_tool_output_for_log(&request, &output) ), ); - log_agent_progress( - self.ai_logs, - "Sending tool output back to model", - ); + log_agent_progress(self.ai_logs, "Sending tool output back to model"); - messages.push(serde_json::json!({ + messages.push(json!({ "role": "user", "content": build_tool_result_user_prompt(&request, &output), })); diff --git a/src/commands/audit/providers/shared.rs b/src/commands/audit/providers/shared.rs index 37f83fb..103971c 100644 --- a/src/commands/audit/providers/shared.rs +++ b/src/commands/audit/providers/shared.rs @@ -13,7 +13,6 @@ use crate::commands::audit::model::{ }; pub(super) const MAX_AGENT_STEPS: usize = 25; -const MAX_COMMAND_OUTPUT_CHARS: usize = 30_000; const AGENT_SYSTEM_PROMPT: &str = include_str!("../../../../templates/aiken/audit_agent_system_prompt.md"); const INITIAL_USER_PROMPT_TEMPLATE: &str = @@ -424,17 +423,6 @@ fn run_command_capture(command: &str, args: &[String], cwd: &Path) -> Result MAX_COMMAND_OUTPUT_CHARS { - let truncated = combined - .chars() - .take(MAX_COMMAND_OUTPUT_CHARS) - .collect::(); - return Ok(format!( - "{}\n...(truncated to {} chars)", - truncated, MAX_COMMAND_OUTPUT_CHARS - )); - } - Ok(combined) } @@ -519,7 +507,6 @@ pub(super) fn describe_read_request_friendly(request: &ReadRequest) -> String { pub(super) fn render_tool_output_for_log( request: &ReadRequest, output: &str, - max_chars: usize, ) -> String { match request { ReadRequest::ReadFile { path } => { @@ -529,24 +516,10 @@ pub(super) fn render_tool_output_for_log( output.chars().count() ) } - _ => truncate_for_log(output, max_chars), + _ => output.to_string(), } } -pub(super) fn render_model_output_for_log(output: &str, max_chars: usize) -> String { - truncate_for_log(output, max_chars) -} - -fn truncate_for_log(output: &str, max_chars: usize) -> String { - let char_count = output.chars().count(); - if char_count <= max_chars { - return output.to_string(); - } - - let preview = output.chars().take(max_chars).collect::(); - format!("{}\n… (truncated, {} chars total)", preview, char_count) -} - pub(super) fn log_agent_progress(enabled: bool, message: impl AsRef) { if enabled { eprintln!("🤖 {}", message.as_ref()); diff --git a/templates/aiken/audit_agent_system_prompt.md b/templates/aiken/audit_agent_system_prompt.md index 24e4007..2c1f040 100644 --- a/templates/aiken/audit_agent_system_prompt.md +++ b/templates/aiken/audit_agent_system_prompt.md @@ -5,8 +5,9 @@ Valid JSON actions: 2) {"action":"grep","pattern":"regex","path":"relative/path/or/dir","context_lines":2} 3) {"action":"list_dir","path":"relative/path"} 4) {"action":"find_files","path":"relative/path","glob":"*.ak"} -5) {"action":"final","status":"completed|scaffolded","findings":[{"title":string,"severity":string,"summary":string,"evidence":[string],"recommendation":string,"file":string|null,"line":number|null}],"next_prompt":string|null} +5) {"action":"final","status":"completed|scaffolded","analysis_summary":string|null,"findings":[{"title":string,"severity":string,"summary":string,"evidence":[string],"recommendation":string,"file":string|null,"line":number|null}],"next_prompt":string|null} Prefer returning file and line whenever you can confidently identify where the bug exists or where the recommendation applies. +In final actions, include `analysis_summary` as a concise 1-3 sentence explanation of what you checked and why you concluded the result. Never include markdown fences. \ No newline at end of file From 2716749a9d3930a96f65ab9b3e88f5e8c0e5f1e1 Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Fri, 27 Feb 2026 18:25:09 -0300 Subject: [PATCH 16/18] feat: add reasoning to anthropic + refactor common logic into share --- src/commands/audit/providers/anthropic.rs | 658 +++++++++++++++++----- src/commands/audit/providers/mod.rs | 29 +- src/commands/audit/providers/openai.rs | 436 ++++---------- src/commands/audit/providers/shared.rs | 235 +++++++- 4 files changed, 881 insertions(+), 477 deletions(-) diff --git a/src/commands/audit/providers/anthropic.rs b/src/commands/audit/providers/anthropic.rs index c3fddff..2ebc36c 100644 --- a/src/commands/audit/providers/anthropic.rs +++ b/src/commands/audit/providers/anthropic.rs @@ -1,23 +1,22 @@ use miette::{Context, IntoDiagnostic, Result}; -use serde_json::Value; +use serde_json::{json, Value}; use std::path::Path; use super::shared::{ block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, - build_tool_result_user_prompt, - describe_read_request_friendly, execute_read_request, iteration_from_parsed, log_agent_progress, - parse_agent_action, render_tool_output_for_log, - summarize_read_request, AgentAction, - MAX_AGENT_STEPS, -}; -use super::{ - AnalysisProvider, + emit_reasoning_double_line_break, emit_reasoning_line_break, finalize_content_stdout, + finalize_reasoning_stdout, log_agent_progress, run_agent_loop, stream_content_delta_to_stdout, + stream_reasoning_delta_to_stdout, ContentStreamState, ReasoningStreamState, }; +use super::AnalysisProvider; use crate::commands::audit::model::{ MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, VulnerabilitySkill, }; +const DEFAULT_MAX_TOKENS: u32 = 1200; +const THINKING_MAX_TOKENS: u32 = 1600; +const THINKING_BUDGET_TOKENS: u32 = 1024; #[derive(Debug, Clone)] pub struct AnthropicProvider { pub endpoint: String, @@ -27,6 +26,428 @@ pub struct AnthropicProvider { pub ai_logs: bool, } +fn build_anthropic_payload_variants( + model: &str, + system_prompt: &str, + messages: &[Value], + stream: bool, +) -> Vec { + let normalized_messages = normalize_anthropic_messages(messages); + + let mut base = json!({ + "model": model, + "max_tokens": DEFAULT_MAX_TOKENS, + "system": system_prompt, + "messages": normalized_messages, + }); + + if stream { + base["stream"] = Value::Bool(true); + } + + let mut with_thinking = base.clone(); + with_thinking["max_tokens"] = Value::from(THINKING_MAX_TOKENS); + with_thinking["thinking"] = json!({ + "type": "enabled", + "budget_tokens": THINKING_BUDGET_TOKENS + }); + + vec![with_thinking, base] +} + +fn normalize_anthropic_messages(messages: &[Value]) -> Vec { + messages + .iter() + .map(|message| { + let role = message + .get("role") + .and_then(Value::as_str) + .unwrap_or("user") + .to_ascii_lowercase(); + + let role = if role == "assistant" { + "assistant" + } else { + "user" + }; + + let content = normalize_anthropic_message_content(message.get("content")); + + json!({ + "role": role, + "content": content, + }) + }) + .collect() +} + +fn normalize_anthropic_message_content(content: Option<&Value>) -> Value { + let Some(content) = content else { + return json!([ + { + "type": "text", + "text": "" + } + ]); + }; + + if let Some(text) = content.as_str() { + return json!([ + { + "type": "text", + "text": text + } + ]); + } + + if let Some(items) = content.as_array() { + let normalized_items = items + .iter() + .map(|item| { + if item.get("type").and_then(Value::as_str).is_some() { + return item.clone(); + } + + if let Some(text) = item.get("text").and_then(Value::as_str) { + return json!({ + "type": "text", + "text": text, + }); + } + + json!({ + "type": "text", + "text": item.to_string(), + }) + }) + .collect::>(); + + return Value::Array(normalized_items); + } + + json!([ + { + "type": "text", + "text": content.to_string() + } + ]) +} + +fn maybe_emit_reasoning_line_break_on_summary_change( + enabled: bool, + state: &mut ReasoningStreamState, + summary_index: Option, +) { + let Some(current_index) = summary_index else { + return; + }; + + if let Some(previous_index) = state.last_summary_index { + if previous_index != current_index { + emit_reasoning_double_line_break(enabled, state); + } + } + + state.last_summary_index = Some(current_index); +} + +fn extract_anthropic_reasoning_delta(event: &Value) -> Option { + let event_type = event + .get("type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + match event_type.as_str() { + "content_block_start" => { + let block_type = event + .pointer("/content_block/type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + if block_type.contains("thinking") || block_type.contains("reasoning") { + return event + .pointer("/content_block/thinking") + .and_then(Value::as_str) + .or_else(|| event.pointer("/content_block/text").and_then(Value::as_str)) + .map(ToString::to_string); + } + + None + } + "content_block_delta" => { + let delta_type = event + .pointer("/delta/type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + if delta_type.contains("thinking") || delta_type.contains("reasoning") { + return event + .pointer("/delta/thinking") + .and_then(Value::as_str) + .or_else(|| event.pointer("/delta/text").and_then(Value::as_str)) + .map(ToString::to_string); + } + + None + } + _ => None, + } +} + +fn extract_anthropic_reasoning_index(event: &Value) -> Option { + let event_type = event + .get("type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + if event_type == "content_block_start" || event_type == "content_block_delta" { + return event.get("index").and_then(Value::as_i64); + } + + None +} + +fn extract_anthropic_content_delta(event: &Value) -> Option { + let event_type = event + .get("type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + match event_type.as_str() { + "content_block_start" => { + let block_type = event + .pointer("/content_block/type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + if block_type == "text" { + return event + .pointer("/content_block/text") + .and_then(Value::as_str) + .map(ToString::to_string); + } + + None + } + "content_block_delta" => { + let delta_type = event + .pointer("/delta/type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + if delta_type == "text_delta" { + return event + .pointer("/delta/text") + .and_then(Value::as_str) + .map(ToString::to_string); + } + + None + } + _ => None, + } +} + +fn extract_anthropic_non_stream_content(response_json: &Value) -> Option { + let mut text_chunks = Vec::new(); + + if let Some(blocks) = response_json.get("content").and_then(Value::as_array) { + for block in blocks { + let block_type = block + .get("type") + .and_then(Value::as_str) + .unwrap_or_default(); + + if block_type == "text" { + if let Some(text) = block.get("text").and_then(Value::as_str) { + if !text.trim().is_empty() { + text_chunks.push(text.to_string()); + } + } + } + } + } + + if text_chunks.is_empty() { + response_json + .pointer("/content/0/text") + .and_then(Value::as_str) + .filter(|value| !value.trim().is_empty()) + .map(ToString::to_string) + } else { + Some(text_chunks.join("")) + } +} + +fn extract_anthropic_non_stream_reasoning(response_json: &Value) -> Option { + let mut reasoning_chunks = Vec::new(); + + if let Some(blocks) = response_json.get("content").and_then(Value::as_array) { + for block in blocks { + let block_type = block + .get("type") + .and_then(Value::as_str) + .unwrap_or_default() + .to_ascii_lowercase(); + + if block_type.contains("thinking") || block_type.contains("reasoning") { + if let Some(text) = block + .get("thinking") + .and_then(Value::as_str) + .or_else(|| block.get("text").and_then(Value::as_str)) + { + if !text.trim().is_empty() { + reasoning_chunks.push(text.to_string()); + } + } + } + } + } + + if reasoning_chunks.is_empty() { + None + } else { + Some(reasoning_chunks.join("\n")) + } +} + +async fn stream_attempt( + client: &reqwest::Client, + endpoint: &str, + api_key: &str, + version: &str, + payload: &Value, + ai_logs: bool, +) -> Result { + let mut response = client + .post(endpoint) + .header("x-api-key", api_key) + .header("anthropic-version", version) + .json(payload) + .send() + .await + .into_diagnostic()?; + + let status = response.status(); + if !status.is_success() { + let body = response.text().await.into_diagnostic()?; + return Err(miette::miette!( + "Streaming request failed with status {}: {}", + status, + body + )); + } + + let mut pending = String::new(); + let mut model_output = String::new(); + let mut reasoning_stream_state = ReasoningStreamState::default(); + let mut content_stream_state = ContentStreamState::default(); + + while let Some(chunk) = response.chunk().await.into_diagnostic()? { + pending.push_str(&String::from_utf8_lossy(&chunk)); + + while let Some(newline_index) = pending.find('\n') { + let line = pending[..newline_index].trim_end_matches('\r').to_string(); + pending.drain(..=newline_index); + + let line = line.trim(); + if line.is_empty() || !line.starts_with("data:") { + continue; + } + + let event_data = line[5..].trim(); + if event_data == "[DONE]" { + break; + } + + let event: Value = match serde_json::from_str(event_data) { + Ok(parsed) => parsed, + Err(_) => continue, + }; + + if let Some(reasoning_delta) = extract_anthropic_reasoning_delta(&event) { + maybe_emit_reasoning_line_break_on_summary_change( + ai_logs, + &mut reasoning_stream_state, + extract_anthropic_reasoning_index(&event), + ); + stream_reasoning_delta_to_stdout( + ai_logs, + &mut reasoning_stream_state, + &reasoning_delta, + ); + } + + if let Some(content_delta) = extract_anthropic_content_delta(&event) { + emit_reasoning_line_break(ai_logs, &mut reasoning_stream_state); + model_output.push_str(&content_delta); + stream_content_delta_to_stdout(ai_logs, &mut content_stream_state, &content_delta); + } + } + } + + finalize_content_stdout(ai_logs, &mut content_stream_state); + finalize_reasoning_stdout(ai_logs, &mut reasoning_stream_state); + + if model_output.is_empty() { + return Err(miette::miette!( + "Streaming response did not include output text deltas" + )); + } + + Ok(model_output) +} + +async fn non_stream_attempt( + client: &reqwest::Client, + endpoint: &str, + api_key: &str, + version: &str, + payload: &Value, + ai_logs: bool, +) -> Result { + let response = client + .post(endpoint) + .header("x-api-key", api_key) + .header("anthropic-version", version) + .json(payload) + .send() + .await + .into_diagnostic()?; + + let status = response.status(); + if !status.is_success() { + let body = response.text().await.into_diagnostic()?; + return Err(miette::miette!( + "Request failed with status {}: {}", + status, + body + )); + } + + let response_json = response.json::().await.into_diagnostic()?; + + let content = extract_anthropic_non_stream_content(&response_json).ok_or_else(|| { + miette::miette!("Anthropic provider returned an unexpected response payload") + })?; + + if let Some(reasoning_text) = extract_anthropic_non_stream_reasoning(&response_json) { + log_agent_progress( + ai_logs, + format!("🧠 Model reasoning output:\n{}", reasoning_text), + ); + } + + Ok(content) +} + impl AnalysisProvider for AnthropicProvider { fn provider_spec(&self) -> ProviderSpec { ProviderSpec { @@ -65,130 +486,103 @@ impl AnalysisProvider for AnthropicProvider { "content": initial_user_prompt, })]; - for step_idx in 0..MAX_AGENT_STEPS { - log_agent_progress( - self.ai_logs, - format!( - "Step {}/{} • requesting next action for skill '{}' ({})", - step_idx + 1, - MAX_AGENT_STEPS, - skill.id, - self.endpoint - ), - ); - - let payload = serde_json::json!({ - "model": self.model, - "max_tokens": 1200, - "system": system_prompt, - "messages": messages.clone(), - }); - - let response_json = block_on_runtime_aware(async { - let client = reqwest::Client::new(); - let response = client - .post(&self.endpoint) - .header("x-api-key", &self.api_key) - .header("anthropic-version", &self.version) - .json(&payload) - .send() - .await - .into_diagnostic()?; - - let response = response.error_for_status().into_diagnostic()?; - response.json::().await.into_diagnostic() - })?; - - let content = response_json - .pointer("/content/0/text") - .and_then(Value::as_str) - .ok_or_else(|| { - miette::miette!("Anthropic provider returned an unexpected response payload") - })?; + run_agent_loop( + skill, + &self.endpoint, + self.ai_logs, + &canonical_root, + permission_prompt, + &mut messages, + "Anthropic provider", + |messages| { + block_on_runtime_aware(async { + let client = reqwest::Client::new(); - messages.push(serde_json::json!({ - "role": "assistant", - "content": content, - })); - - log_agent_progress( - self.ai_logs, - format!( - "Model output:\n{}", - &content - ), - ); - - match parse_agent_action(content)? { - AgentAction::Final(parsed) => { - let findings = parsed - .get("findings") - .and_then(Value::as_array) - .map(|items| items.len()) - .unwrap_or(0); - let status = parsed - .get("status") - .and_then(Value::as_str) - .unwrap_or("completed"); - - log_agent_progress( - self.ai_logs, - format!( - "Model completed skill '{}' at step {}/{} • status={} • findings={}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, - status, - findings - ), - ); - return Ok(iteration_from_parsed(skill, parsed)); - } - AgentAction::ReadRequest(request) => { - log_agent_progress( - self.ai_logs, - format!( - "Model requested: {}", - describe_read_request_friendly(&request) - ), - ); - - log_agent_progress( - self.ai_logs, - format!( - "Running local action: {}", - summarize_read_request(&request) - ), - ); - - let output = execute_read_request(&request, &canonical_root, permission_prompt) - .unwrap_or_else(|error| format!("Request failed: {}", error)); - - log_agent_progress( - self.ai_logs, - format!( - "Tool output:\n{}", - render_tool_output_for_log(&request, &output) - ), - ); - - log_agent_progress( - self.ai_logs, - "Sending tool output back to model", - ); - - messages.push(serde_json::json!({ - "role": "user", - "content": build_tool_result_user_prompt(&request, &output), - })); - } - } - } + if self.ai_logs { + let mut last_stream_error: Option = None; + let stream_payloads = build_anthropic_payload_variants( + &self.model, + system_prompt, + messages, + true, + ); + + for (attempt_idx, payload) in stream_payloads.iter().enumerate() { + let stream_result = stream_attempt( + &client, + &self.endpoint, + &self.api_key, + &self.version, + payload, + self.ai_logs, + ) + .await; + + match stream_result { + Ok(content) => return Ok(content), + Err(error) => { + last_stream_error = Some(error.to_string()); + log_agent_progress( + self.ai_logs, + format!( + "⚠️ Streaming attempt {} failed: {}", + attempt_idx + 1, + error + ), + ); + } + } + } + + if let Some(error) = last_stream_error { + log_agent_progress( + self.ai_logs, + format!( + "⚠️ Streaming unavailable, falling back to non-stream request: {}", + error + ), + ); + } + } + + let non_stream_payloads = + build_anthropic_payload_variants(&self.model, system_prompt, messages, false); + let mut last_non_stream_error: Option = None; + + for (attempt_idx, payload) in non_stream_payloads.iter().enumerate() { + let request_result = non_stream_attempt( + &client, + &self.endpoint, + &self.api_key, + &self.version, + payload, + self.ai_logs, + ) + .await; + + match request_result { + Ok(content) => return Ok(content), + Err(error) => { + last_non_stream_error = Some(error.to_string()); + log_agent_progress( + self.ai_logs, + format!( + "⚠️ Non-stream attempt {} failed: {}", + attempt_idx + 1, + error + ), + ); + } + } + } - Err(miette::miette!( - "Anthropic provider exceeded max interactive read steps ({}) for skill '{}' (enable --ai-logs to inspect progress)", - MAX_AGENT_STEPS, - skill.id - )) + Err(miette::miette!( + "All non-stream model request attempts failed for model '{}': {}", + self.model, + last_non_stream_error.unwrap_or_else(|| "unknown error".to_string()) + )) + }) + }, + ) } } diff --git a/src/commands/audit/providers/mod.rs b/src/commands/audit/providers/mod.rs index b114345..1a05dad 100644 --- a/src/commands/audit/providers/mod.rs +++ b/src/commands/audit/providers/mod.rs @@ -20,7 +20,7 @@ const DEFAULT_AI_ENDPOINT: &str = "https://api.openai.com/v1/responses"; const DEFAULT_AI_MODEL: &str = "gpt-4.1-mini"; const DEFAULT_AI_API_KEY_ENV: &str = "OPENAI_API_KEY"; const DEFAULT_ANTHROPIC_ENDPOINT: &str = "https://api.anthropic.com/v1/messages"; -const DEFAULT_ANTHROPIC_MODEL: &str = "claude-3-5-haiku-latest"; +const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6"; const DEFAULT_ANTHROPIC_API_KEY_ENV: &str = "ANTHROPIC_API_KEY"; const DEFAULT_ANTHROPIC_VERSION: &str = "2023-06-01"; const DEFAULT_OLLAMA_ENDPOINT: &str = "http://localhost:11434/v1/chat/completions"; @@ -40,6 +40,15 @@ pub trait AnalysisProvider { ) -> Result; } +fn load_api_key(api_key_env: &str, provider_name: &str) -> Result { + std::env::var(api_key_env).into_diagnostic().with_context(|| { + format!( + "Missing API key environment variable '{}'. Set it before running with --provider {}.", + api_key_env, provider_name + ) + }) +} + pub fn build_provider(args: &Args) -> Result> { match args.provider.to_ascii_lowercase().as_str() { "scaffold" => Ok(Box::new(ScaffoldProvider)), @@ -56,13 +65,7 @@ pub fn build_provider(args: &Args) -> Result> { .api_key_env .as_deref() .unwrap_or(DEFAULT_AI_API_KEY_ENV); - - let api_key = std::env::var(api_key_env).into_diagnostic().with_context(|| { - format!( - "Missing API key environment variable '{}'. Set it before running with --provider openai.", - api_key_env - ) - })?; + let api_key = load_api_key(api_key_env, "openai")?; Ok(Box::new(OpenAiProvider { endpoint, @@ -86,15 +89,7 @@ pub fn build_provider(args: &Args) -> Result> { .api_key_env .as_deref() .unwrap_or(DEFAULT_ANTHROPIC_API_KEY_ENV); - - let api_key = std::env::var(api_key_env) - .into_diagnostic() - .with_context(|| { - format!( - "Missing API key environment variable '{}'. Set it before running with --provider anthropic.", - api_key_env - ) - })?; + let api_key = load_api_key(api_key_env, "anthropic")?; Ok(Box::new(AnthropicProvider { endpoint, diff --git a/src/commands/audit/providers/openai.rs b/src/commands/audit/providers/openai.rs index 895526d..a8a4fdc 100644 --- a/src/commands/audit/providers/openai.rs +++ b/src/commands/audit/providers/openai.rs @@ -1,14 +1,13 @@ use miette::{Context, IntoDiagnostic, Result}; use serde_json::{json, Value}; -use std::io::{self, Write}; use std::path::Path; -use std::time::Instant; use super::shared::{ block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, - build_tool_result_user_prompt, describe_read_request_friendly, execute_read_request, - iteration_from_parsed, log_agent_progress, parse_agent_action, - render_tool_output_for_log, summarize_read_request, AgentAction, MAX_AGENT_STEPS, + emit_reasoning_double_line_break, emit_reasoning_line_break, finalize_content_stdout, + finalize_reasoning_stdout, log_agent_progress, run_agent_loop, + stream_content_delta_to_stdout, stream_reasoning_delta_to_stdout, + ContentStreamState, ReasoningStreamState, }; use super::AnalysisProvider; use crate::commands::audit::model::{ @@ -212,66 +211,6 @@ fn normalize_responses_input_content(role: &str, content: &Value) -> Value { ]) } -#[derive(Debug, Clone, Copy, Default)] -struct ReasoningStreamState { - started: bool, - line_break_emitted: bool, - last_summary_index: Option, -} - -#[derive(Debug, Clone, Copy, Default)] -struct ContentStreamState { - started: bool, - ends_with_newline: bool, -} - -fn stream_reasoning_delta_to_stdout( - enabled: bool, - state: &mut ReasoningStreamState, - delta: &str, -) { - if !enabled || delta.is_empty() { - return; - } - - let mut stdout = io::stdout().lock(); - - if !state.started { - let _ = writeln!(stdout, "🤖 🧠 Reasoning summary:"); - state.started = true; - } - - let _ = write!(stdout, "{}", delta); - let _ = stdout.flush(); - state.line_break_emitted = false; -} - -fn emit_reasoning_line_break(enabled: bool, state: &mut ReasoningStreamState) { - if !enabled || !state.started || state.line_break_emitted { - return; - } - - let mut stdout = io::stdout().lock(); - let _ = writeln!(stdout); - let _ = stdout.flush(); - state.line_break_emitted = true; -} - -fn emit_reasoning_double_line_break(enabled: bool, state: &mut ReasoningStreamState) { - if !enabled || !state.started || state.line_break_emitted { - return; - } - - let mut stdout = io::stdout().lock(); - let _ = write!(stdout, "\n\n"); - let _ = stdout.flush(); - state.line_break_emitted = true; -} - -fn finalize_reasoning_stdout(enabled: bool, state: &mut ReasoningStreamState) { - emit_reasoning_line_break(enabled, state); -} - fn extract_summary_index(event: &Value) -> Option { event .get("summary_index") @@ -297,36 +236,6 @@ fn maybe_emit_reasoning_line_break_on_summary_change( state.last_summary_index = Some(current_index); } -fn stream_content_delta_to_stdout(enabled: bool, state: &mut ContentStreamState, delta: &str) { - if !enabled || delta.is_empty() { - return; - } - - let mut stdout = io::stdout().lock(); - - if !state.started { - let _ = write!(stdout, "🤖 ↳ Output: "); - state.started = true; - state.ends_with_newline = false; - } - - let _ = write!(stdout, "{}", delta); - let _ = stdout.flush(); - - state.ends_with_newline = delta.ends_with('\n'); -} - -fn finalize_content_stdout(enabled: bool, state: &mut ContentStreamState) { - if !enabled || !state.started || state.ends_with_newline { - return; - } - - let mut stdout = io::stdout().lock(); - let _ = writeln!(stdout); - let _ = stdout.flush(); - state.ends_with_newline = true; -} - fn extract_chat_reasoning_delta(event: &Value) -> Option { event .pointer("/choices/0/delta/reasoning_content") @@ -521,7 +430,6 @@ async fn stream_chat_attempt( let mut pending = String::new(); let mut model_output = String::new(); - let mut reasoning_output = String::new(); let mut reasoning_stream_state = ReasoningStreamState::default(); let mut content_stream_state = ContentStreamState::default(); @@ -548,7 +456,6 @@ async fn stream_chat_attempt( }; if let Some(reasoning_delta) = extract_chat_reasoning_delta(&event) { - reasoning_output.push_str(&reasoning_delta); stream_reasoning_delta_to_stdout( ai_logs, &mut reasoning_stream_state, @@ -573,8 +480,6 @@ async fn stream_chat_attempt( )); } - let _ = reasoning_output; - Ok(model_output) } @@ -605,7 +510,6 @@ async fn stream_responses_attempt( let mut pending = String::new(); let mut model_output = String::new(); - let mut reasoning_output = String::new(); let mut reasoning_stream_state = ReasoningStreamState::default(); let mut content_stream_state = ContentStreamState::default(); @@ -637,7 +541,6 @@ async fn stream_responses_attempt( &mut reasoning_stream_state, extract_summary_index(&event), ); - reasoning_output.push_str(&reasoning_delta); stream_reasoning_delta_to_stdout( ai_logs, &mut reasoning_stream_state, @@ -662,8 +565,6 @@ async fn stream_responses_attempt( )); } - let _ = reasoning_output; - Ok(model_output) } @@ -812,83 +713,138 @@ impl AnalysisProvider for OpenAiProvider { }), ]; - for step_idx in 0..MAX_AGENT_STEPS { - log_agent_progress( - self.ai_logs, - format!( - "Step {}/{} • requesting next action for skill '{}' ({})", - step_idx + 1, - MAX_AGENT_STEPS, - skill.id, - self.endpoint - ), - ); - - log_agent_progress( - self.ai_logs, - format!( - "🤔 Thinking… waiting for model response (step {}/{}, skill='{}')", - step_idx + 1, - MAX_AGENT_STEPS, - skill.id - ), - ); - - let request_started_at = Instant::now(); - let response_content_result = block_on_runtime_aware(async { - let client = reqwest::Client::new(); - let reasoning_effort = self.reasoning_effort.as_deref(); - - if self.ai_logs { - let mut last_stream_error: Option = None; - let stream_payloads = match api_family { + run_agent_loop( + skill, + &self.endpoint, + self.ai_logs, + &canonical_root, + permission_prompt, + &mut messages, + "AI provider", + |messages| { + block_on_runtime_aware(async { + let client = reqwest::Client::new(); + let reasoning_effort = self.reasoning_effort.as_deref(); + + if self.ai_logs { + let mut last_stream_error: Option = None; + let stream_payloads = match api_family { + ApiFamily::ChatCompletions => build_chat_payload_variants( + &self.model, + messages, + true, + reasoning_effort, + self.ollama_compat, + ), + ApiFamily::Responses => build_responses_payload_variants( + &self.model, + messages, + true, + reasoning_effort, + ), + }; + + for (attempt_idx, stream_payload) in stream_payloads.iter().enumerate() { + let stream_attempt = match api_family { + ApiFamily::ChatCompletions => { + stream_chat_attempt( + &client, + &self.endpoint, + &self.api_key, + stream_payload, + self.ai_logs, + ) + .await + } + ApiFamily::Responses => { + stream_responses_attempt( + &client, + &self.endpoint, + &self.api_key, + stream_payload, + self.ai_logs, + ) + .await + } + }; + + match stream_attempt { + Ok(content) => return Ok(content), + Err(error) => { + last_stream_error = Some(error.to_string()); + log_agent_progress( + self.ai_logs, + format!( + "⚠️ Streaming attempt {} failed: {}", + attempt_idx + 1, + error + ), + ); + } + } + } + + if let Some(error) = last_stream_error { + log_agent_progress( + self.ai_logs, + format!( + "⚠️ Streaming unavailable, falling back to non-stream request: {}", + error + ), + ); + } + } + + let non_stream_payloads = match api_family { ApiFamily::ChatCompletions => build_chat_payload_variants( &self.model, - &messages, - true, + messages, + false, reasoning_effort, self.ollama_compat, ), ApiFamily::Responses => build_responses_payload_variants( &self.model, - &messages, - true, + messages, + false, reasoning_effort, ), }; - for (attempt_idx, stream_payload) in stream_payloads.iter().enumerate() { - let stream_attempt = match api_family { + let mut last_non_stream_error: Option = None; + + for (attempt_idx, payload) in non_stream_payloads.iter().enumerate() { + let request_result = match api_family { ApiFamily::ChatCompletions => { - stream_chat_attempt( + non_stream_chat_attempt( &client, &self.endpoint, &self.api_key, - stream_payload, + payload, self.ai_logs, ) .await } ApiFamily::Responses => { - stream_responses_attempt( + non_stream_responses_attempt( &client, &self.endpoint, &self.api_key, - stream_payload, + payload, self.ai_logs, ) .await } }; - match stream_attempt { + match request_result { Ok(content) => return Ok(content), Err(error) => { - last_stream_error = Some(error.to_string()); + last_non_stream_error = Some(error.to_string()); log_agent_progress( self.ai_logs, format!( - "⚠️ Streaming attempt {} failed: {}", + "⚠️ Non-stream attempt {} failed: {}", attempt_idx + 1, error ), @@ -897,186 +853,12 @@ impl AnalysisProvider for OpenAiProvider { } } - if let Some(error) = last_stream_error { - log_agent_progress( - self.ai_logs, - format!( - "⚠️ Streaming unavailable, falling back to non-stream request: {}", - error - ), - ); - } - } - - let non_stream_payloads = match api_family { - ApiFamily::ChatCompletions => build_chat_payload_variants( - &self.model, - &messages, - false, - reasoning_effort, - self.ollama_compat, - ), - ApiFamily::Responses => { - build_responses_payload_variants(&self.model, &messages, false, reasoning_effort) - } - }; - - let mut last_non_stream_error: Option = None; - - for (attempt_idx, payload) in non_stream_payloads.iter().enumerate() { - let request_result = match api_family { - ApiFamily::ChatCompletions => { - non_stream_chat_attempt( - &client, - &self.endpoint, - &self.api_key, - payload, - self.ai_logs, - ) - .await - } - ApiFamily::Responses => { - non_stream_responses_attempt( - &client, - &self.endpoint, - &self.api_key, - payload, - self.ai_logs, - ) - .await - } - }; - - match request_result { - Ok(content) => return Ok(content), - Err(error) => { - last_non_stream_error = Some(error.to_string()); - log_agent_progress( - self.ai_logs, - format!( - "⚠️ Non-stream attempt {} failed: {}", - attempt_idx + 1, - error - ), - ); - } - } - } - - Err(miette::miette!( - "All non-stream model request attempts failed: {}", - last_non_stream_error.unwrap_or_else(|| "unknown error".to_string()) - )) - }); - - let elapsed = request_started_at.elapsed(); - - if let Err(error) = &response_content_result { - log_agent_progress( - self.ai_logs, - format!( - "❌ Model request failed after {} ms: {}", - elapsed.as_millis(), - error - ), - ); - } else { - log_agent_progress( - self.ai_logs, - format!("✅ Model response received in {} ms", elapsed.as_millis()), - ); - } - - let content = response_content_result?; - - messages.push(json!({ - "role": "assistant", - "content": content, - })); - - log_agent_progress( - self.ai_logs, - format!("Model output:\n{}", &content), - ); - - match parse_agent_action(&content)? { - AgentAction::Final(parsed) => { - let findings = parsed - .get("findings") - .and_then(Value::as_array) - .map(|items| items.len()) - .unwrap_or(0); - let status = parsed - .get("status") - .and_then(Value::as_str) - .unwrap_or("completed"); - let analysis_summary = parsed - .get("analysis_summary") - .and_then(Value::as_str) - .filter(|value| !value.trim().is_empty()); - - if let Some(summary) = analysis_summary { - log_agent_progress( - self.ai_logs, - format!( - "Model analysis summary:\n{}", - summary - ), - ); - } - - log_agent_progress( - self.ai_logs, - format!( - "Model completed skill '{}' at step {}/{} • status={} • findings={}", - skill.id, - step_idx + 1, - MAX_AGENT_STEPS, - status, - findings - ), - ); - return Ok(iteration_from_parsed(skill, parsed)); - } - AgentAction::ReadRequest(request) => { - log_agent_progress( - self.ai_logs, - format!( - "Model requested: {}", - describe_read_request_friendly(&request) - ), - ); - - log_agent_progress( - self.ai_logs, - format!("Running local action: {}", summarize_read_request(&request)), - ); - - let output = execute_read_request(&request, &canonical_root, permission_prompt) - .unwrap_or_else(|error| format!("Request failed: {}", error)); - - log_agent_progress( - self.ai_logs, - format!( - "Tool output:\n{}", - render_tool_output_for_log(&request, &output) - ), - ); - - log_agent_progress(self.ai_logs, "Sending tool output back to model"); - - messages.push(json!({ - "role": "user", - "content": build_tool_result_user_prompt(&request, &output), - })); - } - } - } - - Err(miette::miette!( - "AI provider exceeded max interactive read steps ({}) for skill '{}' (enable --ai-logs to inspect progress)", - MAX_AGENT_STEPS, - skill.id - )) + Err(miette::miette!( + "All non-stream model request attempts failed: {}", + last_non_stream_error.unwrap_or_else(|| "unknown error".to_string()) + )) + }) + }, + ) } } diff --git a/src/commands/audit/providers/shared.rs b/src/commands/audit/providers/shared.rs index 103971c..dc1f2bc 100644 --- a/src/commands/audit/providers/shared.rs +++ b/src/commands/audit/providers/shared.rs @@ -1,9 +1,10 @@ use miette::{Context, IntoDiagnostic, Result}; use serde::Deserialize; -use serde_json::Value; +use serde_json::{json, Value}; use std::io::{self, Write}; use std::path::{Path, PathBuf}; use std::process::Command; +use std::time::Instant; use tokio::runtime::Handle; use crate::commands::audit::model::{ @@ -56,6 +57,100 @@ struct RawReadRequest { glob: Option, } +#[derive(Debug, Clone, Copy, Default)] +pub(super) struct ReasoningStreamState { + pub(super) started: bool, + pub(super) line_break_emitted: bool, + pub(super) last_summary_index: Option, +} + +#[derive(Debug, Clone, Copy, Default)] +pub(super) struct ContentStreamState { + pub(super) started: bool, + pub(super) ends_with_newline: bool, +} + +pub(super) fn stream_reasoning_delta_to_stdout( + enabled: bool, + state: &mut ReasoningStreamState, + delta: &str, +) { + if !enabled || delta.is_empty() { + return; + } + + let mut stdout = io::stdout().lock(); + + if !state.started { + let _ = writeln!(stdout, "🤖 🧠 Reasoning summary:"); + state.started = true; + } + + let _ = write!(stdout, "{}", delta); + let _ = stdout.flush(); + state.line_break_emitted = false; +} + +pub(super) fn emit_reasoning_line_break(enabled: bool, state: &mut ReasoningStreamState) { + if !enabled || !state.started || state.line_break_emitted { + return; + } + + let mut stdout = io::stdout().lock(); + let _ = writeln!(stdout); + let _ = stdout.flush(); + state.line_break_emitted = true; +} + +pub(super) fn emit_reasoning_double_line_break(enabled: bool, state: &mut ReasoningStreamState) { + if !enabled || !state.started || state.line_break_emitted { + return; + } + + let mut stdout = io::stdout().lock(); + let _ = write!(stdout, "\n\n"); + let _ = stdout.flush(); + state.line_break_emitted = true; +} + +pub(super) fn finalize_reasoning_stdout(enabled: bool, state: &mut ReasoningStreamState) { + emit_reasoning_line_break(enabled, state); +} + +pub(super) fn stream_content_delta_to_stdout( + enabled: bool, + state: &mut ContentStreamState, + delta: &str, +) { + if !enabled || delta.is_empty() { + return; + } + + let mut stdout = io::stdout().lock(); + + if !state.started { + let _ = write!(stdout, "🤖 ↳ Output: "); + state.started = true; + state.ends_with_newline = false; + } + + let _ = write!(stdout, "{}", delta); + let _ = stdout.flush(); + + state.ends_with_newline = delta.ends_with('\n'); +} + +pub(super) fn finalize_content_stdout(enabled: bool, state: &mut ContentStreamState) { + if !enabled || !state.started || state.ends_with_newline { + return; + } + + let mut stdout = io::stdout().lock(); + let _ = writeln!(stdout); + let _ = stdout.flush(); + state.ends_with_newline = true; +} + pub(super) fn build_agent_system_prompt() -> &'static str { AGENT_SYSTEM_PROMPT } @@ -97,6 +192,144 @@ pub(super) fn build_tool_result_user_prompt(request: &ReadRequest, output: &str) .replace("{{OUTPUT}}", output) } +pub(super) fn run_agent_loop( + skill: &VulnerabilitySkill, + endpoint: &str, + ai_logs: bool, + project_root: &Path, + permission_prompt: &PermissionPromptSpec, + messages: &mut Vec, + provider_label: &str, + mut request_model: F, +) -> Result +where + F: FnMut(&[Value]) -> Result, +{ + for step_idx in 0..MAX_AGENT_STEPS { + log_agent_progress( + ai_logs, + format!( + "Step {}/{} • requesting next action for skill '{}' ({})", + step_idx + 1, + MAX_AGENT_STEPS, + skill.id, + endpoint + ), + ); + + log_agent_progress( + ai_logs, + format!( + "🤔 Thinking… waiting for model response (step {}/{}, skill='{}')", + step_idx + 1, + MAX_AGENT_STEPS, + skill.id + ), + ); + + let request_started_at = Instant::now(); + let response_content_result = request_model(messages.as_slice()); + let elapsed = request_started_at.elapsed(); + + if let Err(error) = &response_content_result { + log_agent_progress( + ai_logs, + format!( + "❌ Model request failed after {} ms: {}", + elapsed.as_millis(), + error + ), + ); + } else { + log_agent_progress( + ai_logs, + format!("✅ Model response received in {} ms", elapsed.as_millis()), + ); + } + + let content = response_content_result?; + + messages.push(json!({ + "role": "assistant", + "content": content, + })); + + log_agent_progress(ai_logs, format!("Model output:\n{}", &content)); + + match parse_agent_action(&content)? { + AgentAction::Final(parsed) => { + let findings = parsed + .get("findings") + .and_then(Value::as_array) + .map(|items| items.len()) + .unwrap_or(0); + let status = parsed + .get("status") + .and_then(Value::as_str) + .unwrap_or("completed"); + let analysis_summary = parsed + .get("analysis_summary") + .and_then(Value::as_str) + .filter(|value| !value.trim().is_empty()); + + if let Some(summary) = analysis_summary { + log_agent_progress(ai_logs, format!("Model analysis summary:\n{}", summary)); + } + + log_agent_progress( + ai_logs, + format!( + "Model completed skill '{}' at step {}/{} • status={} • findings={}", + skill.id, + step_idx + 1, + MAX_AGENT_STEPS, + status, + findings + ), + ); + + return Ok(iteration_from_parsed(skill, parsed)); + } + AgentAction::ReadRequest(request) => { + log_agent_progress( + ai_logs, + format!("Model requested: {}", describe_read_request_friendly(&request)), + ); + + log_agent_progress( + ai_logs, + format!("Running local action: {}", summarize_read_request(&request)), + ); + + let output = execute_read_request(&request, project_root, permission_prompt) + .unwrap_or_else(|error| format!("Request failed: {}", error)); + + log_agent_progress( + ai_logs, + format!( + "Tool output:\n{}", + render_tool_output_for_log(&request, &output) + ), + ); + + log_agent_progress(ai_logs, "Sending tool output back to model"); + + messages.push(json!({ + "role": "user", + "content": build_tool_result_user_prompt(&request, &output), + })); + } + } + } + + Err(miette::miette!( + "{} exceeded max interactive read steps ({}) for skill '{}' (enable --ai-logs to inspect progress)", + provider_label, + MAX_AGENT_STEPS, + skill.id + )) +} + fn render_permission_prompt(permission_prompt: &PermissionPromptSpec) -> String { PERMISSION_PROMPT_TEMPLATE .replace("{{ workspace_root }}", &permission_prompt.workspace_root) From 2afa381bc22b65304c34686f2f21934009d8baee Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Mon, 2 Mar 2026 11:05:36 -0300 Subject: [PATCH 17/18] feat: add new skills for vulnerabilities --- .../001-strict-value-equality.md | 66 +++++++++++++++--- .../002-missing-address-validation.md | 68 +++++++++++++++++++ .../vulnerabilities/003-unvalidated-datum.md | 53 +++++++++++++++ src/commands/audit/mod.rs | 8 +++ 4 files changed, 184 insertions(+), 11 deletions(-) create mode 100644 skills/vulnerabilities/002-missing-address-validation.md create mode 100644 skills/vulnerabilities/003-unvalidated-datum.md diff --git a/skills/vulnerabilities/001-strict-value-equality.md b/skills/vulnerabilities/001-strict-value-equality.md index 9701152..b4615f9 100644 --- a/skills/vulnerabilities/001-strict-value-equality.md +++ b/skills/vulnerabilities/001-strict-value-equality.md @@ -2,27 +2,71 @@ id: strict-value-equality-001 name: strict-value-equality severity: high -description: Vulnerabilities related to strict value equality in the protocol. +description: Detects vulnerabilities caused by enforcing exact equality on ADA or full output values in Aiken validators. prompt_fragment: Read validator scripts and flag strict equality checks on ADA or full output values; treat comparisons using without_lovelace() as acceptable and not strict ADA equality. confidence_hint: medium --- # strict-value-equality -Validators could become unsatisfiable when enforcing exact equality on ADA or full output values. -Exact value equality is almost always incorrect for ADA in Plutus V2. Validators should enforce minimums, not exact amounts, unless there is a very strong invariant requiring exact equality. +Validators may become unsatisfiable when enforcing exact equality on ADA or full output values. +Exact value equality is almost always incorrect for ADA. + +Strict equality can break due to: + +- minUTxO changes +- Datum size changes +- Reference script additions +- Token additions that increase UTxO size + +Validators should enforce minimum values rather than exact equality, unless there is a strong invariant requiring exact equality. + +--- ## When to use -Every time a search for vulnerabilities related to strict value equality in the protocol is explicitly requested by the user. +Every time a search for vulnerabilities related to strict value equality is required. + +This skill MUST be applied only to on-chain code when value comparisons are detected. + +--- + +## Detection Logic + +Identify cases where **strict equality** is enforced on: + +### 1. ADA values -## Instructions +Examples: + +- Comparing lovelace amounts using exact equality: + - `lovelace_of(tx_out.value) == expectedLovelace` + - `tx_out.value == from_lovelace(expectedLovelace)` + +### 2. Full output values + +- Exact equality between `Value` objects: + - `tx_out.value == expectedValue` + +--- + +## Allowed / Safe Cases (Do NOT report) + +Do NOT report findings if: + +- ADA is explicitly excluded from the comparison: + - `value.without_lovelace() == expectedValue` +- Comparison enforces a **minimum**: + - `lovelaceValueOf(tx_out.value) >= minLovelace` + +If a strong invariant is present, do NOT report unless it is violated or undocumented. + +--- -1. Read the validator scripts of the protocol and identify any instances where strict value equality is enforced on ADA or full output values. -2. Take into account that values compared using `without_lovelace()` are not considered strict equality, as they ignore the ADA component. +## Description -## Reporting guidance +Explain: -- Include the exact equality expression and where it appears. -- Explain why it can make the validator unsatisfiable in realistic transaction construction. -- Recommend replacing strict equality on ADA with a minimum-bound check when possible. \ No newline at end of file +- Where the strict equality is enforced +- Why it can cause the validator to become unsatisfiable +- Under which ledger conditions this may fail (minUTxO, datum growth, etc.) \ No newline at end of file diff --git a/skills/vulnerabilities/002-missing-address-validation.md b/skills/vulnerabilities/002-missing-address-validation.md new file mode 100644 index 0000000..7f0e1ef --- /dev/null +++ b/skills/vulnerabilities/002-missing-address-validation.md @@ -0,0 +1,68 @@ +--- +id: missing-address-validation-002 +name: missing-address-validation +severity: high +description: Detects validators and minting policies that fail to explicitly validate output destination addresses. +prompt_fragment: Identify output-selection logic that validates datum/token/value but never validates destination address. +--- + +# missing-address-validation + +Validators and minting policies must explicitly validate the destination address of: + +- Continuing outputs +- Newly created outputs +- Outputs receiving newly minted tokens + +Selecting outputs only by: + +- Datum presence +- Token presence +- Value shape + +without checking the destination address can allow attackers to redirect funds or protocol state to unintended addresses. + +--- + +## Detection Scope + +Focus on logic that: + +- Filters or selects outputs +- Asserts the existence of a “continuing” or “target” output +- Checks token presence, datum presence, or value constraints + +--- + +## Detection Logic + +Report a vulnerability if **ALL** of the following hold: + +1. The validator or policy: + - Selects one or more outputs (`transaction.outputs`) + - Or asserts existence of a continuing output + +2. Output selection or validation is based on some of the following: + - Datum presence (`output.datum`) + - Token presence + - Value shape + - Minted value containment + - Reference script presence + +3. **No explicit validation** of the output address is performed, such as: + - `output.address == expected_address` + - Matching on `ScriptCredential` / `PubKeyCredential` + +4. No indirect address validation is performed via: + - Comparison with input address + - Comparison with a known script address + +--- + +### Description + +Explain: + +- How outputs are selected +- That destination address is never checked +- How an attacker can redirect funds or state \ No newline at end of file diff --git a/skills/vulnerabilities/003-unvalidated-datum.md b/skills/vulnerabilities/003-unvalidated-datum.md new file mode 100644 index 0000000..27c228b --- /dev/null +++ b/skills/vulnerabilities/003-unvalidated-datum.md @@ -0,0 +1,53 @@ +--- +id: unvalidated-datum-003 +name: unvalidated-datum +severity: high +description: Detects missing validation of datum when creating or validating outputs at script addresses. +prompt_fragment: Detect outputs at script addresses where datum is missing, partially validated, or extracted but unused. +--- + +# unvalidated-datum + +Validators must validate the datum of outputs created or continued at script addresses. + +If a validator creates or enforces an output to a script address but does not validate its datum, attackers may: + +- Inject arbitrary datum data +- Corrupt protocol state +- Create unspendable UTxOs +- Break protocol invariants + +--- + +## Detection Logic + +Report if some of the following hold: + +1. The validator DOES NOT check on EVERY FIELD of the OUTPUT datum (first must check what is the type of the datum and what fields it has, if it is a structured datum, and then check that all fields are checked). For example: + - The validator only checks a subset of the datum fields + - The validator only checks the datum type but not its fields + +2. The validator DOES NOT: + - Extract the datum from the output + - Decode it into the expected datum type + - Validate it (whether by checking equality on fields or using them in other functions) + - Or enforce equality with an expected datum + +3. The datum is extracted but discarded as wildcard match, or the datum is extracted but not used at all in the validation + +--- + +## Do NOT Report If + +- The output datum is extracted, decoded, and fully validated +- The output is to a pubkey address +- The output carries no datum by design + +--- + +## Description + +Explain: + +- Which output lacks validation and what is the datum schema expected for that output +- If there are certain fields of the datum that are not validated, explain which ones and what is the expected value for those fields \ No newline at end of file diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index 1cffb9e..a3eb503 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -479,6 +479,14 @@ fn load_embedded_seed_skills() -> Result> { Path::new("skills/vulnerabilities/001-strict-value-equality.md"), include_str!("../../../skills/vulnerabilities/001-strict-value-equality.md"), ), + ( + Path::new("skills/vulnerabilities/002-missing-address-validation.md"), + include_str!("../../../skills/vulnerabilities/002-missing-address-validation.md"), + ), + ( + Path::new("skills/vulnerabilities/003-unvalidated-datum.md"), + include_str!("../../../skills/vulnerabilities/003-unvalidated-datum.md"), + ), ]; seed_files From 43273799fe03b51740a1e4cbbb85677bb1b02c8f Mon Sep 17 00:00:00 2001 From: Maximiliano Duthey Date: Mon, 2 Mar 2026 17:50:50 -0300 Subject: [PATCH 18/18] feat: add heuristic logic + clippy format and fixes --- .../006-heuristic-audit-provider-minimal.md | 159 ++ src/commands/audit/ast.rs | 28 +- src/commands/audit/mod.rs | 29 +- src/commands/audit/providers/anthropic.rs | 78 +- src/commands/audit/providers/heuristic.rs | 59 + .../audit/providers/heuristic_detectors.rs | 1919 +++++++++++++++++ src/commands/audit/providers/mod.rs | 7 +- src/commands/audit/providers/openai.rs | 106 +- src/commands/audit/providers/shared.rs | 58 +- tests/e2e/happy_path.rs | 26 + 10 files changed, 2332 insertions(+), 137 deletions(-) create mode 100644 design/006-heuristic-audit-provider-minimal.md create mode 100644 src/commands/audit/providers/heuristic.rs create mode 100644 src/commands/audit/providers/heuristic_detectors.rs diff --git a/design/006-heuristic-audit-provider-minimal.md b/design/006-heuristic-audit-provider-minimal.md new file mode 100644 index 0000000..de618a7 --- /dev/null +++ b/design/006-heuristic-audit-provider-minimal.md @@ -0,0 +1,159 @@ +# Heuristic Audit Provider (No-LLM) — Minimal Milestone 2 Spec + +## Status + +Draft (spec-first). + +This document defines the minimal implementation scope to add a local, deterministic heuristic analysis provider for `trix audit` without using LLMs. + +## Goals + +- Provide a functioning heuristic analysis engine for common Aiken vulnerability patterns. +- Expose heuristic scanning via the existing `trix audit` CLI flow. +- Reuse current state/report contracts and avoid breaking compatibility. +- Keep implementation minimal and focused on Milestone 2 outputs. + +## Scope + +In-scope: +- New `heuristic` provider in the existing provider selector. +- Rule-based detection for the 3 currently embedded vulnerability skills: + - `strict-value-equality-001` + - `missing-address-validation-002` + - `unvalidated-datum-003` +- Deterministic, local-only analysis (no network calls, no LLM/tool loop). +- Continued use of current output files: + - `.tx3/audit/state.json` + - `.tx3/audit/vulnerabilities.md` + +Out-of-scope: +- Generic interpretation of arbitrary custom skills. +- Replacing existing LLM providers (`openai`, `anthropic`, `ollama`). +- New output formats or schema changes. +- Type-checked semantic analysis beyond untyped AST (future work). + +## Current Architecture Anchors + +- Audit orchestration and skill loop: `src/commands/audit/mod.rs` +- Provider abstraction and factory: `src/commands/audit/providers/mod.rs` +- Heuristic provider adapter: `src/commands/audit/providers/heuristic.rs` +- Heuristic detector engine (AST-first): `src/commands/audit/providers/heuristic_detectors.rs` +- AST/cache and validator context: `src/commands/audit/ast.rs` +- Analysis/report data contracts: `src/commands/audit/model.rs` +- Existing seed skills: + - `skills/vulnerabilities/001-strict-value-equality.md` + - `skills/vulnerabilities/002-missing-address-validation.md` + - `skills/vulnerabilities/003-unvalidated-datum.md` + +## CLI Contract Delta + +`--provider` MUST accept `heuristic`. + +Defaults remain unchanged: +- Default provider stays `scaffold`. +- `heuristic` does not require `--endpoint`, `--model`, or `--api-key-env`. + +## High-Level Execution Flow (heuristic mode) + +1. Build provider from CLI args (`heuristic`). +2. Discover source files and load/reuse AST cache. +3. Load vulnerability skills from `--skills-dir` (or embedded seeds fallback). +4. For each skill, run deterministic local rule evaluation. +5. Persist incremental state after each skill. +6. Render final report with existing markdown template. + +## Heuristic Provider Requirements + +### Functional requirements + +- MUST implement `AnalysisProvider` and return `SkillIterationResult` for each skill. +- MUST run without network/API keys. +- MUST be deterministic in findings ordering and status values. +- MUST support only the 3 known embedded skill IDs in this milestone. +- MUST continue processing when a skill is not supported. + +### Unsupported skills + +If a skill ID is not supported by the heuristic provider: +- `status` MUST be `unsupported-skill`. +- `findings` MUST be empty. +- `next_prompt` MUST be `None`. +- Audit execution MUST continue. + +## Detection Strategy (M2 minimal) + +The provider uses an **AST-first** approach: +- Parse each `.ak` source into Aiken `UntypedModule` (`aiken_lang` parser). +- Traverse validator handlers/fallback expressions and patterns (`UntypedExpr`, `UntypedPattern`). +- Apply deterministic rule checks from AST structure and operators. +- Use text matching only as fallback when AST parsing fails for a file. + +This keeps detection deterministic, local-only, and less fragile than string-only scanning. + +### Rule 1: strict-value-equality-001 + +Report when AST `BinOp::Eq` compares expressions that include ADA/value signals. + +Do NOT report when clear safe patterns are detected, e.g.: +- `without_lovelace(...)` +- minimum checks (`>=`) for lovelace/value constraints + +### Rule 2: missing-address-validation-002 + +Report when AST patterns extract script credentials from output addresses (e.g. `Script(hash_var)`) but no later equality/inequality validation references that extracted variable. + +Do NOT report when explicit address checks are present. + +### Rule 3: unvalidated-datum-003 + +Report when inline datum is extracted from output (e.g. `InlineDatum(x)`) but is not semantically validated, or is validated only partially (e.g. spread pattern `Datum { ..., .. }`). + +Do NOT report when evidence suggests complete datum extraction/validation. + +## Data Contract Compatibility + +- `AnalysisStateJson` schema remains unchanged. +- `VulnerabilityFinding` schema remains unchanged. +- Report rendering remains unchanged. +- Provider metadata SHOULD identify `heuristic` clearly in `state.json`. + +## Caching / Memory Requirements + +- The provider MUST reuse AST/context built by existing audit flow. +- Existing AST cache in `.tx3/audit/aiken-ast.json` remains the inter-run memory mechanism. +- `--no-ast-cache` MUST still force regeneration. +- Heuristic rule execution MUST be AST-first even when cache is present (parsing source modules directly for rule traversal). + +## Security and Isolation + +- No outbound requests. +- No AI tool-loop execution path. +- Only local workspace file reads under existing audit orchestration. + +## Acceptance Criteria (Milestone 2 minimal) + +- A1: `trix audit --provider heuristic` produces a structured vulnerability report. +- B1: Rule behavior is consistent with the 3 public skill definition files. +- C1: Users can execute heuristic scans locally end-to-end from CLI. +- D1: Running against known vulnerable scripts yields non-zero findings. + +## Testing Plan + +- Unit tests for each heuristic rule: + - positive and negative scenarios + - unsupported-skill behavior +- E2E audit test for `--provider heuristic` in initialized project. +- Keep existing audit smoke/edge coverage passing. + +## Requirement-to-Test Traceability (initial) + +- Provider selection supports `heuristic` → audit provider validation tests. +- End-to-end execution and artifacts → `tests/e2e/happy_path.rs`. +- Unsupported skill non-fatal handling → heuristic provider unit test. +- Contract compatibility (`state.json`, report rendering) → existing audit happy-path assertions + heuristic additions. + +## Open Questions (deferred) + +- Should heuristic become default provider in a later milestone? +- Should custom external skills be supported beyond known IDs? +- Should future versions parse semantic expressions from typed AST for lower false positives? diff --git a/src/commands/audit/ast.rs b/src/commands/audit/ast.rs index ecc53b6..955caf4 100644 --- a/src/commands/audit/ast.rs +++ b/src/commands/audit/ast.rs @@ -3,8 +3,7 @@ use aiken_lang::{ self, Annotation, ArgBy, ModuleKind, UntypedArg, UntypedDefinition, UntypedFunction, UntypedModule, }, - parser, - version, + parser, version, }; use chrono::Utc; use cryptoxide::{digest::Digest as _, sha2::Sha256}; @@ -57,7 +56,12 @@ pub fn generate_ast_and_validator_context( if ast_out_path.exists() && !no_ast_cache { let cached_text = std::fs::read_to_string(ast_out_path) .into_diagnostic() - .with_context(|| format!("Failed to read cached AST JSON at {}", ast_out_path.display()))?; + .with_context(|| { + format!( + "Failed to read cached AST JSON at {}", + ast_out_path.display() + ) + })?; let cached_snapshot: AstSnapshot = serde_json::from_str(&cached_text) .into_diagnostic() @@ -117,7 +121,7 @@ pub fn generate_ast_and_validator_context( }) } - fn parse_module_snapshot(project_root: &Path, source_file: &Path) -> Result { +fn parse_module_snapshot(project_root: &Path, source_file: &Path) -> Result { let src = std::fs::read_to_string(source_file) .into_diagnostic() .with_context(|| format!("Failed to read source file {}", source_file.display()))?; @@ -170,7 +174,10 @@ fn build_validator_context_from_modules(modules: &[ParsedModule]) -> ValidatorCo id, module: module_snapshot.source_file.clone(), source_file: module_snapshot.source_file.clone(), - source_span: resolve_source_span(&module_snapshot.module, validator.location), + source_span: resolve_source_span( + &module_snapshot.module, + validator.location, + ), handlers, }) }) @@ -261,7 +268,11 @@ fn annotation_to_string(annotation: &Annotation) -> String { .join(", ") ), Annotation::Pair { fst, snd, .. } => { - format!("Pair<{}, {}>", annotation_to_string(fst), annotation_to_string(snd)) + format!( + "Pair<{}, {}>", + annotation_to_string(fst), + annotation_to_string(snd) + ) } } } @@ -427,7 +438,10 @@ validator alpha { let err = generate_ast_and_validator_context(root, &[source], &ast_out, false) .expect_err("expected invalid cached ast json failure"); - assert!(err.to_string().contains("AST output unreadable/invalid JSON")); + assert!( + err.to_string() + .contains("AST output unreadable/invalid JSON") + ); } #[test] diff --git a/src/commands/audit/mod.rs b/src/commands/audit/mod.rs index a3eb503..f0e372e 100644 --- a/src/commands/audit/mod.rs +++ b/src/commands/audit/mod.rs @@ -11,10 +11,10 @@ pub mod providers; use self::ast::generate_ast_and_validator_context; use self::model::{ - AnalysisStateJson, MiniPrompt, PermissionPromptSpec, SkillIterationResult, - ValidatorContextMap, VulnerabilityFinding, VulnerabilityReportSpec, VulnerabilitySkill, + AnalysisStateJson, MiniPrompt, PermissionPromptSpec, SkillIterationResult, ValidatorContextMap, + VulnerabilityFinding, VulnerabilityReportSpec, VulnerabilitySkill, }; -use self::providers::{build_provider, AnalysisProvider}; +use self::providers::{AnalysisProvider, build_provider}; const DEFAULT_SKILLS_DIR: &str = "skills/vulnerabilities"; @@ -47,7 +47,7 @@ pub struct Args { #[arg(long, default_value = "skills/vulnerabilities")] pub skills_dir: String, - /// Analysis provider: scaffold | openai | anthropic | ollama + /// Analysis provider: scaffold | heuristic | openai | anthropic | ollama #[arg(long, default_value = "scaffold")] pub provider: String, @@ -111,11 +111,7 @@ pub fn _run(args: Args, config: &RootConfig, _profile: &ProfileConfig) -> Result run_analysis(args, config, provider.as_ref()) } -fn run_analysis( - args: Args, - config: &RootConfig, - provider: &dyn AnalysisProvider, -) -> Result<()> { +fn run_analysis(args: Args, config: &RootConfig, provider: &dyn AnalysisProvider) -> Result<()> { let skills_dir = PathBuf::from(&args.skills_dir); let state_out = PathBuf::from(&args.state_out); let report_out = PathBuf::from(&args.report_out); @@ -396,7 +392,8 @@ fn build_permission_prompt_spec( ); } else { scope_rules.push( - "Read scope is workspace: any path under project root can be read/searched.".to_string(), + "Read scope is workspace: any path under project root can be read/searched." + .to_string(), ); } @@ -541,7 +538,9 @@ fn parse_skill_content(path: &Path, content: &str) -> Result false_positives: parsed.false_positives, references: parsed.references, tags: parsed.tags, - confidence_hint: parsed.confidence_hint.filter(|value| !value.trim().is_empty()), + confidence_hint: parsed + .confidence_hint + .filter(|value| !value.trim().is_empty()), guidance_markdown: body.trim().to_string(), }) } @@ -559,9 +558,7 @@ fn split_frontmatter(content: &str) -> Result<(String, String)> { }; if first_line.trim() != "---" { - return Err(miette::miette!( - "Missing frontmatter start delimiter `---`" - )); + return Err(miette::miette!("Missing frontmatter start delimiter `---`")); } let mut frontmatter_lines = Vec::new(); @@ -576,9 +573,7 @@ fn split_frontmatter(content: &str) -> Result<(String, String)> { } if !found_end { - return Err(miette::miette!( - "Missing frontmatter end delimiter `---`" - )); + return Err(miette::miette!("Missing frontmatter end delimiter `---`")); } let body_lines = lines.collect::>(); diff --git a/src/commands/audit/providers/anthropic.rs b/src/commands/audit/providers/anthropic.rs index 2ebc36c..f791579 100644 --- a/src/commands/audit/providers/anthropic.rs +++ b/src/commands/audit/providers/anthropic.rs @@ -1,14 +1,14 @@ use miette::{Context, IntoDiagnostic, Result}; -use serde_json::{json, Value}; +use serde_json::{Value, json}; use std::path::Path; +use super::AnalysisProvider; use super::shared::{ - block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, - emit_reasoning_double_line_break, emit_reasoning_line_break, finalize_content_stdout, - finalize_reasoning_stdout, log_agent_progress, run_agent_loop, stream_content_delta_to_stdout, - stream_reasoning_delta_to_stdout, ContentStreamState, ReasoningStreamState, + ContentStreamState, ReasoningStreamState, block_on_runtime_aware, build_agent_system_prompt, + build_initial_user_prompt, emit_reasoning_double_line_break, emit_reasoning_line_break, + finalize_content_stdout, finalize_reasoning_stdout, log_agent_progress, run_agent_loop, + stream_content_delta_to_stdout, stream_reasoning_delta_to_stdout, }; -use super::AnalysisProvider; use crate::commands::audit::model::{ MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, VulnerabilitySkill, @@ -142,10 +142,10 @@ fn maybe_emit_reasoning_line_break_on_summary_change( return; }; - if let Some(previous_index) = state.last_summary_index { - if previous_index != current_index { - emit_reasoning_double_line_break(enabled, state); - } + if let Some(previous_index) = state.last_summary_index + && previous_index != current_index + { + emit_reasoning_double_line_break(enabled, state); } state.last_summary_index = Some(current_index); @@ -265,12 +265,11 @@ fn extract_anthropic_non_stream_content(response_json: &Value) -> Option .and_then(Value::as_str) .unwrap_or_default(); - if block_type == "text" { - if let Some(text) = block.get("text").and_then(Value::as_str) { - if !text.trim().is_empty() { - text_chunks.push(text.to_string()); - } - } + if block_type == "text" + && let Some(text) = block.get("text").and_then(Value::as_str) + && !text.trim().is_empty() + { + text_chunks.push(text.to_string()); } } } @@ -297,16 +296,14 @@ fn extract_anthropic_non_stream_reasoning(response_json: &Value) -> Option Result { - let canonical_root = project_root.canonicalize().into_diagnostic().with_context(|| { - format!( - "Failed to canonicalize project root {}", - project_root.display() - ) - })?; + let canonical_root = project_root + .canonicalize() + .into_diagnostic() + .with_context(|| { + format!( + "Failed to canonicalize project root {}", + project_root.display() + ) + })?; let system_prompt = build_agent_system_prompt(); let initial_user_prompt = build_initial_user_prompt( @@ -488,12 +488,14 @@ impl AnalysisProvider for AnthropicProvider { run_agent_loop( skill, - &self.endpoint, - self.ai_logs, - &canonical_root, - permission_prompt, + super::shared::AgentLoopContext { + endpoint: &self.endpoint, + ai_logs: self.ai_logs, + project_root: &canonical_root, + permission_prompt, + provider_label: "Anthropic provider", + }, &mut messages, - "Anthropic provider", |messages| { block_on_runtime_aware(async { let client = reqwest::Client::new(); @@ -545,8 +547,12 @@ impl AnalysisProvider for AnthropicProvider { } } - let non_stream_payloads = - build_anthropic_payload_variants(&self.model, system_prompt, messages, false); + let non_stream_payloads = build_anthropic_payload_variants( + &self.model, + system_prompt, + messages, + false, + ); let mut last_non_stream_error: Option = None; for (attempt_idx, payload) in non_stream_payloads.iter().enumerate() { diff --git a/src/commands/audit/providers/heuristic.rs b/src/commands/audit/providers/heuristic.rs new file mode 100644 index 0000000..2cab09b --- /dev/null +++ b/src/commands/audit/providers/heuristic.rs @@ -0,0 +1,59 @@ +use miette::Result; +use std::path::Path; + +use super::AnalysisProvider; +use crate::commands::audit::model::{ + MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, + VulnerabilitySkill, +}; + +#[path = "heuristic_detectors.rs"] +mod detectors; + +#[derive(Debug, Default)] +pub struct HeuristicProvider; + +impl AnalysisProvider for HeuristicProvider { + fn provider_spec(&self) -> ProviderSpec { + ProviderSpec { + name: "heuristic".to_string(), + model: None, + notes: "Deterministic local heuristic provider. No external AI calls are performed." + .to_string(), + } + } + + fn analyze_skill( + &self, + skill: &VulnerabilitySkill, + _prompt: &MiniPrompt, + source_references: &[String], + validator_context: &ValidatorContextMap, + project_root: &Path, + _permission_prompt: &PermissionPromptSpec, + ) -> Result { + let findings = match detectors::collect_findings_for_skill( + skill, + source_references, + validator_context, + project_root, + )? { + Some(findings) => findings, + None => { + return Ok(SkillIterationResult { + skill_id: skill.id.clone(), + status: "unsupported-skill".to_string(), + findings: vec![], + next_prompt: None, + }); + } + }; + + Ok(SkillIterationResult { + skill_id: skill.id.clone(), + status: "completed".to_string(), + findings, + next_prompt: None, + }) + } +} diff --git a/src/commands/audit/providers/heuristic_detectors.rs b/src/commands/audit/providers/heuristic_detectors.rs new file mode 100644 index 0000000..d2f8f22 --- /dev/null +++ b/src/commands/audit/providers/heuristic_detectors.rs @@ -0,0 +1,1919 @@ +use aiken_lang::{ + ast::{BinOp, ModuleKind, UntypedDefinition, UntypedPattern}, + expr::UntypedExpr, + parser, +}; +use miette::{IntoDiagnostic, Result}; +use std::collections::{BTreeSet, HashSet}; +use std::path::Path; + +use crate::commands::audit::model::{ + ValidatorContextMap, VulnerabilityFinding, VulnerabilitySkill, +}; + +pub(super) fn collect_findings_for_skill( + skill: &VulnerabilitySkill, + source_references: &[String], + validator_context: &ValidatorContextMap, + project_root: &Path, +) -> Result>> { + let sources = load_sources(project_root, source_references, validator_context)?; + + let findings = match skill.id.as_str() { + "strict-value-equality-001" => Some(detect_strict_value_equality(skill, &sources)), + "missing-address-validation-002" => { + Some(detect_missing_address_validation(skill, &sources)) + } + "unvalidated-datum-003" => Some(detect_unvalidated_datum(skill, &sources)), + _ => None, + }; + + Ok(findings) +} + +#[derive(Debug, Clone)] +struct SourceDoc { + file: String, + content: String, + lines: Vec, + module: Option, +} + +fn load_sources( + project_root: &Path, + source_references: &[String], + validator_context: &ValidatorContextMap, +) -> Result> { + let mut candidates = BTreeSet::new(); + + for path in source_references { + if path.ends_with(".ak") { + candidates.insert(path.clone()); + } + } + + for validator in &validator_context.validators { + if validator.source_file.ends_with(".ak") { + candidates.insert(validator.source_file.clone()); + } + } + + let mut sources = Vec::new(); + + for relative in candidates { + let full_path = project_root.join(&relative); + if !full_path.exists() { + continue; + } + + let content = std::fs::read_to_string(&full_path).into_diagnostic()?; + let lines = content + .lines() + .map(|line| line.to_string()) + .collect::>(); + let module = parse_module(&content); + + sources.push(SourceDoc { + file: relative, + content, + lines, + module, + }); + } + + Ok(sources) +} + +fn parse_module(content: &str) -> Option { + parser::module(content, ModuleKind::Validator) + .ok() + .map(|(module, _)| module) +} + +fn detect_strict_value_equality( + skill: &VulnerabilitySkill, + sources: &[SourceDoc], +) -> Vec { + let mut findings = Vec::new(); + + for source in sources { + let Some(module) = &source.module else { + findings.extend(detect_strict_value_equality_text_fallback(skill, source)); + continue; + }; + + let mut eq_locations = Vec::new(); + + for definition in &module.definitions { + let UntypedDefinition::Validator(validator) = definition else { + continue; + }; + + for handler in validator + .handlers + .iter() + .chain(std::iter::once(&validator.fallback)) + { + collect_strict_value_equalities(&handler.body, &mut eq_locations); + } + } + + for (byte_offset, snippet) in eq_locations { + let line = module.lines.line_number(byte_offset).unwrap_or(1); + findings.push(VulnerabilityFinding { + title: "Strict value equality in validator logic".to_string(), + severity: skill.severity.clone(), + summary: "Detected strict equality over value/ADA-related expression; this can make validators unsatisfiable under ledger changes.".to_string(), + evidence: vec![format!("{}:{} -> {}", source.file, line, snippet)], + recommendation: + "Prefer minimum/value-shape checks and avoid exact ADA/value equality unless a strong invariant requires it." + .to_string(), + file: Some(source.file.clone()), + line: Some(line), + }); + } + } + + findings +} + +fn collect_strict_value_equalities(expr: &UntypedExpr, out: &mut Vec<(usize, String)>) { + match expr { + UntypedExpr::BinOp { + name, + left, + right, + location, + } => { + if *name == BinOp::Eq + && (is_value_or_lovelace_expr(left) || is_value_or_lovelace_expr(right)) + && !contains_without_lovelace(left) + && !contains_without_lovelace(right) + { + out.push(( + location.start, + "strict equality on value/lovelace expression".to_string(), + )); + } + + collect_strict_value_equalities(left, out); + collect_strict_value_equalities(right, out); + } + UntypedExpr::Sequence { expressions, .. } + | UntypedExpr::LogicalOpChain { expressions, .. } => { + for inner in expressions { + collect_strict_value_equalities(inner, out); + } + } + UntypedExpr::PipeLine { expressions, .. } => { + for inner in expressions { + collect_strict_value_equalities(inner, out); + } + } + UntypedExpr::Call { fun, arguments, .. } => { + collect_strict_value_equalities(fun, out); + for arg in arguments { + collect_strict_value_equalities(&arg.value, out); + } + } + UntypedExpr::Assignment { + value, patterns, .. + } => { + collect_strict_value_equalities(value, out); + for pattern in patterns { + collect_pattern_exprs_for_scan( + &pattern.pattern, + out, + collect_strict_value_equalities, + ); + } + } + UntypedExpr::When { + subject, clauses, .. + } => { + collect_strict_value_equalities(subject, out); + for clause in clauses { + collect_strict_value_equalities(&clause.then, out); + } + } + UntypedExpr::If { + branches, + final_else, + .. + } => { + for branch in branches { + collect_strict_value_equalities(&branch.condition, out); + collect_strict_value_equalities(&branch.body, out); + } + collect_strict_value_equalities(final_else, out); + } + UntypedExpr::Fn { body, .. } => collect_strict_value_equalities(body, out), + UntypedExpr::Trace { + then, + label, + arguments, + .. + } => { + collect_strict_value_equalities(then, out); + collect_strict_value_equalities(label, out); + for arg in arguments { + collect_strict_value_equalities(arg, out); + } + } + UntypedExpr::TraceIfFalse { value, .. } + | UntypedExpr::FieldAccess { + container: value, .. + } + | UntypedExpr::TupleIndex { tuple: value, .. } + | UntypedExpr::UnOp { value, .. } => collect_strict_value_equalities(value, out), + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => { + for element in elems { + collect_strict_value_equalities(element, out); + } + } + UntypedExpr::Pair { fst, snd, .. } => { + collect_strict_value_equalities(fst, out); + collect_strict_value_equalities(snd, out); + } + UntypedExpr::RecordUpdate { + constructor, + arguments, + .. + } => { + collect_strict_value_equalities(constructor, out); + for arg in arguments { + collect_strict_value_equalities(&arg.value, out); + } + } + _ => {} + } +} + +fn detect_missing_address_validation( + skill: &VulnerabilitySkill, + sources: &[SourceDoc], +) -> Vec { + let mut findings = Vec::new(); + + for source in sources { + let Some(module) = &source.module else { + findings.extend(detect_missing_address_validation_text_fallback( + skill, source, + )); + continue; + }; + + for definition in &module.definitions { + let UntypedDefinition::Validator(validator) = definition else { + continue; + }; + + for handler in validator + .handlers + .iter() + .chain(std::iter::once(&validator.fallback)) + { + let mut script_hash_vars = Vec::new(); + let mut validated_vars = HashSet::new(); + + collect_script_hash_bindings(&handler.body, &mut script_hash_vars); + collect_equality_validated_vars(&handler.body, &mut validated_vars); + + for (var_name, byte_offset) in script_hash_vars { + let line = module.lines.line_number(byte_offset).unwrap_or(1); + if var_name != "_" && !validated_vars.contains(&var_name) { + findings.push(VulnerabilityFinding { + title: "Script credential extracted but not validated".to_string(), + severity: skill.severity.clone(), + summary: format!( + "Output address script credential is extracted as '{}' but never compared against an expected value.", + var_name + ), + evidence: vec![format!( + "{}:{} -> extracted script credential '{}' without validation", + source.file, line, var_name + )], + recommendation: + "Add explicit validation that extracted script credential matches expected value (e.g. policy_id or known script hash)." + .to_string(), + file: Some(source.file.clone()), + line: Some(line), + }); + } + } + } + } + } + + findings +} + +fn collect_script_hash_bindings(expr: &UntypedExpr, out: &mut Vec<(String, usize)>) { + match expr { + UntypedExpr::Assignment { + value, + patterns, + kind, + location, + .. + } => { + if kind.is_expect() && has_script_constructor_pattern(patterns) { + for pattern in patterns { + collect_bound_vars_from_script_pattern(&pattern.pattern, out, location.start); + } + } + + collect_script_hash_bindings(value, out); + } + UntypedExpr::Sequence { expressions, .. } + | UntypedExpr::LogicalOpChain { expressions, .. } => { + for inner in expressions { + collect_script_hash_bindings(inner, out); + } + } + UntypedExpr::PipeLine { expressions, .. } => { + for inner in expressions { + collect_script_hash_bindings(inner, out); + } + } + UntypedExpr::Call { fun, arguments, .. } => { + collect_script_hash_bindings(fun, out); + for arg in arguments { + collect_script_hash_bindings(&arg.value, out); + } + } + UntypedExpr::When { + subject, clauses, .. + } => { + collect_script_hash_bindings(subject, out); + for clause in clauses { + collect_script_hash_bindings(&clause.then, out); + } + } + UntypedExpr::If { + branches, + final_else, + .. + } => { + for branch in branches { + collect_script_hash_bindings(&branch.condition, out); + collect_script_hash_bindings(&branch.body, out); + } + collect_script_hash_bindings(final_else, out); + } + UntypedExpr::Fn { body, .. } => collect_script_hash_bindings(body, out), + UntypedExpr::Trace { + then, + label, + arguments, + .. + } => { + collect_script_hash_bindings(then, out); + collect_script_hash_bindings(label, out); + for arg in arguments { + collect_script_hash_bindings(arg, out); + } + } + UntypedExpr::TraceIfFalse { value, .. } + | UntypedExpr::FieldAccess { + container: value, .. + } + | UntypedExpr::TupleIndex { tuple: value, .. } + | UntypedExpr::UnOp { value, .. } => collect_script_hash_bindings(value, out), + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => { + for element in elems { + collect_script_hash_bindings(element, out); + } + } + UntypedExpr::Pair { fst, snd, .. } => { + collect_script_hash_bindings(fst, out); + collect_script_hash_bindings(snd, out); + } + UntypedExpr::RecordUpdate { + constructor, + arguments, + .. + } => { + collect_script_hash_bindings(constructor, out); + for arg in arguments { + collect_script_hash_bindings(&arg.value, out); + } + } + _ => {} + } +} + +fn collect_equality_validated_vars(expr: &UntypedExpr, out: &mut HashSet) { + match expr { + UntypedExpr::BinOp { + name, left, right, .. + } => { + if matches!(name, BinOp::Eq | BinOp::NotEq) { + collect_var_names(left, out); + collect_var_names(right, out); + } + + collect_equality_validated_vars(left, out); + collect_equality_validated_vars(right, out); + } + UntypedExpr::Sequence { expressions, .. } + | UntypedExpr::LogicalOpChain { expressions, .. } => { + for inner in expressions { + collect_equality_validated_vars(inner, out); + } + } + UntypedExpr::PipeLine { expressions, .. } => { + for inner in expressions { + collect_equality_validated_vars(inner, out); + } + } + UntypedExpr::Call { fun, arguments, .. } => { + collect_equality_validated_vars(fun, out); + for arg in arguments { + collect_equality_validated_vars(&arg.value, out); + } + } + UntypedExpr::Assignment { value, .. } + | UntypedExpr::TraceIfFalse { value, .. } + | UntypedExpr::FieldAccess { + container: value, .. + } + | UntypedExpr::TupleIndex { tuple: value, .. } + | UntypedExpr::UnOp { value, .. } => collect_equality_validated_vars(value, out), + UntypedExpr::When { + subject, clauses, .. + } => { + collect_equality_validated_vars(subject, out); + for clause in clauses { + collect_equality_validated_vars(&clause.then, out); + } + } + UntypedExpr::If { + branches, + final_else, + .. + } => { + for branch in branches { + collect_equality_validated_vars(&branch.condition, out); + collect_equality_validated_vars(&branch.body, out); + } + collect_equality_validated_vars(final_else, out); + } + UntypedExpr::Fn { body, .. } => collect_equality_validated_vars(body, out), + UntypedExpr::Trace { + then, + label, + arguments, + .. + } => { + collect_equality_validated_vars(then, out); + collect_equality_validated_vars(label, out); + for arg in arguments { + collect_equality_validated_vars(arg, out); + } + } + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => { + for element in elems { + collect_equality_validated_vars(element, out); + } + } + UntypedExpr::Pair { fst, snd, .. } => { + collect_equality_validated_vars(fst, out); + collect_equality_validated_vars(snd, out); + } + UntypedExpr::RecordUpdate { + constructor, + arguments, + .. + } => { + collect_equality_validated_vars(constructor, out); + for arg in arguments { + collect_equality_validated_vars(&arg.value, out); + } + } + _ => {} + } +} + +fn detect_unvalidated_datum( + skill: &VulnerabilitySkill, + sources: &[SourceDoc], +) -> Vec { + let mut findings = Vec::new(); + + for source in sources { + let Some(module) = &source.module else { + findings.extend(detect_unvalidated_datum_text_fallback(skill, source)); + continue; + }; + + for definition in &module.definitions { + let UntypedDefinition::Validator(validator) = definition else { + continue; + }; + + for handler in validator + .handlers + .iter() + .chain(std::iter::once(&validator.fallback)) + { + let mut inline_datum_vars = Vec::new(); + collect_inline_datum_bindings(&handler.body, &mut inline_datum_vars); + + for (var_name, byte_offset) in inline_datum_vars { + let line = module.lines.line_number(byte_offset).unwrap_or(1); + if var_name == "_" { + continue; + } + + let tracked_vars = collect_aliases_for_var(&handler.body, &var_name); + let has_partial_validation = + has_partial_datum_validation_for_vars(&handler.body, &tracked_vars); + let has_semantic_validation = + has_semantic_validation_for_vars(&handler.body, &tracked_vars); + + if has_partial_validation || !has_semantic_validation { + let summary = if has_partial_validation { + format!( + "Inline datum '{}' is validated only partially (spread pattern like `Datum {{ ..., .. }}`), which may leave fields unchecked.", + var_name + ) + } else { + format!( + "Inline datum '{}' is extracted from output but not validated by type or field constraints.", + var_name + ) + }; + + findings.push(VulnerabilityFinding { + title: "Datum extracted but not validated".to_string(), + severity: skill.severity.clone(), + summary, + evidence: vec![format!( + "{}:{} -> extracted inline datum '{}' without validation", + source.file, line, var_name + )], + recommendation: + "Add explicit datum type validation (`expect : Datum = ...`) and field-level checks or invariant comparisons." + .to_string(), + file: Some(source.file.clone()), + line: Some(line), + }); + } + } + } + } + } + + findings +} + +fn collect_inline_datum_bindings(expr: &UntypedExpr, out: &mut Vec<(String, usize)>) { + match expr { + UntypedExpr::Assignment { + value, + patterns, + kind, + location, + .. + } => { + if kind.is_expect() && has_inline_datum_constructor_pattern(patterns) { + for pattern in patterns { + collect_bound_vars_from_inline_datum_pattern( + &pattern.pattern, + out, + location.start, + ); + } + } + + collect_inline_datum_bindings(value, out); + } + UntypedExpr::Sequence { expressions, .. } + | UntypedExpr::LogicalOpChain { expressions, .. } => { + for inner in expressions { + collect_inline_datum_bindings(inner, out); + } + } + UntypedExpr::PipeLine { expressions, .. } => { + for inner in expressions { + collect_inline_datum_bindings(inner, out); + } + } + UntypedExpr::Call { fun, arguments, .. } => { + collect_inline_datum_bindings(fun, out); + for arg in arguments { + collect_inline_datum_bindings(&arg.value, out); + } + } + UntypedExpr::When { + subject, clauses, .. + } => { + collect_inline_datum_bindings(subject, out); + for clause in clauses { + collect_inline_datum_bindings(&clause.then, out); + } + } + UntypedExpr::If { + branches, + final_else, + .. + } => { + for branch in branches { + collect_inline_datum_bindings(&branch.condition, out); + collect_inline_datum_bindings(&branch.body, out); + } + collect_inline_datum_bindings(final_else, out); + } + UntypedExpr::Fn { body, .. } => collect_inline_datum_bindings(body, out), + UntypedExpr::Trace { + then, + label, + arguments, + .. + } => { + collect_inline_datum_bindings(then, out); + collect_inline_datum_bindings(label, out); + for arg in arguments { + collect_inline_datum_bindings(arg, out); + } + } + UntypedExpr::TraceIfFalse { value, .. } + | UntypedExpr::FieldAccess { + container: value, .. + } + | UntypedExpr::TupleIndex { tuple: value, .. } + | UntypedExpr::UnOp { value, .. } => collect_inline_datum_bindings(value, out), + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => { + for element in elems { + collect_inline_datum_bindings(element, out); + } + } + UntypedExpr::Pair { fst, snd, .. } => { + collect_inline_datum_bindings(fst, out); + collect_inline_datum_bindings(snd, out); + } + UntypedExpr::RecordUpdate { + constructor, + arguments, + .. + } => { + collect_inline_datum_bindings(constructor, out); + for arg in arguments { + collect_inline_datum_bindings(&arg.value, out); + } + } + _ => {} + } +} + +#[allow(dead_code)] +fn collect_validated_datum_vars(expr: &UntypedExpr, out: &mut HashSet) { + match expr { + UntypedExpr::Assignment { + patterns, + kind, + value, + .. + } => { + if kind.is_expect() { + for pattern in patterns { + collect_all_pattern_var_names(&pattern.pattern, out); + } + } + + collect_validated_datum_vars(value, out); + } + UntypedExpr::BinOp { left, right, .. } => { + collect_var_names(left, out); + collect_var_names(right, out); + collect_validated_datum_vars(left, out); + collect_validated_datum_vars(right, out); + } + UntypedExpr::Sequence { expressions, .. } + | UntypedExpr::LogicalOpChain { expressions, .. } => { + for inner in expressions { + collect_validated_datum_vars(inner, out); + } + } + UntypedExpr::PipeLine { expressions, .. } => { + for inner in expressions { + collect_validated_datum_vars(inner, out); + } + } + UntypedExpr::Call { fun, arguments, .. } => { + collect_validated_datum_vars(fun, out); + for arg in arguments { + collect_validated_datum_vars(&arg.value, out); + } + } + UntypedExpr::When { + subject, clauses, .. + } => { + collect_validated_datum_vars(subject, out); + for clause in clauses { + collect_validated_datum_vars(&clause.then, out); + } + } + UntypedExpr::If { + branches, + final_else, + .. + } => { + for branch in branches { + collect_validated_datum_vars(&branch.condition, out); + collect_validated_datum_vars(&branch.body, out); + } + collect_validated_datum_vars(final_else, out); + } + UntypedExpr::Fn { body, .. } => collect_validated_datum_vars(body, out), + UntypedExpr::Trace { + then, + label, + arguments, + .. + } => { + collect_validated_datum_vars(then, out); + collect_validated_datum_vars(label, out); + for arg in arguments { + collect_validated_datum_vars(arg, out); + } + } + UntypedExpr::TraceIfFalse { value, .. } + | UntypedExpr::FieldAccess { + container: value, .. + } + | UntypedExpr::TupleIndex { tuple: value, .. } + | UntypedExpr::UnOp { value, .. } => collect_validated_datum_vars(value, out), + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => { + for element in elems { + collect_validated_datum_vars(element, out); + } + } + UntypedExpr::Pair { fst, snd, .. } => { + collect_validated_datum_vars(fst, out); + collect_validated_datum_vars(snd, out); + } + UntypedExpr::RecordUpdate { + constructor, + arguments, + .. + } => { + collect_validated_datum_vars(constructor, out); + for arg in arguments { + collect_validated_datum_vars(&arg.value, out); + } + } + _ => {} + } +} + +fn collect_aliases_for_var(expr: &UntypedExpr, root_var: &str) -> HashSet { + let mut tracked = HashSet::new(); + tracked.insert(root_var.to_string()); + + loop { + let before = tracked.len(); + collect_aliases_for_vars_once(expr, &mut tracked); + if tracked.len() == before { + break; + } + } + + tracked +} + +fn collect_aliases_for_vars_once(expr: &UntypedExpr, tracked: &mut HashSet) { + match expr { + UntypedExpr::Assignment { + value, + patterns, + kind, + .. + } => { + if kind.is_expect() && expr_references_any_var(value, tracked) { + for assignment in patterns { + collect_all_pattern_var_names(&assignment.pattern, tracked); + } + } + + collect_aliases_for_vars_once(value, tracked); + } + UntypedExpr::Sequence { expressions, .. } + | UntypedExpr::LogicalOpChain { expressions, .. } => { + for inner in expressions { + collect_aliases_for_vars_once(inner, tracked); + } + } + UntypedExpr::PipeLine { expressions, .. } => { + for inner in expressions { + collect_aliases_for_vars_once(inner, tracked); + } + } + UntypedExpr::Call { fun, arguments, .. } => { + collect_aliases_for_vars_once(fun, tracked); + for arg in arguments { + collect_aliases_for_vars_once(&arg.value, tracked); + } + } + UntypedExpr::When { + subject, clauses, .. + } => { + collect_aliases_for_vars_once(subject, tracked); + for clause in clauses { + collect_aliases_for_vars_once(&clause.then, tracked); + } + } + UntypedExpr::If { + branches, + final_else, + .. + } => { + for branch in branches { + collect_aliases_for_vars_once(&branch.condition, tracked); + collect_aliases_for_vars_once(&branch.body, tracked); + } + collect_aliases_for_vars_once(final_else, tracked); + } + UntypedExpr::Fn { body, .. } => collect_aliases_for_vars_once(body, tracked), + UntypedExpr::Trace { + then, + label, + arguments, + .. + } => { + collect_aliases_for_vars_once(then, tracked); + collect_aliases_for_vars_once(label, tracked); + for arg in arguments { + collect_aliases_for_vars_once(arg, tracked); + } + } + UntypedExpr::TraceIfFalse { value, .. } + | UntypedExpr::FieldAccess { + container: value, .. + } + | UntypedExpr::TupleIndex { tuple: value, .. } + | UntypedExpr::UnOp { value, .. } => collect_aliases_for_vars_once(value, tracked), + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => { + for element in elems { + collect_aliases_for_vars_once(element, tracked); + } + } + UntypedExpr::Pair { fst, snd, .. } => { + collect_aliases_for_vars_once(fst, tracked); + collect_aliases_for_vars_once(snd, tracked); + } + UntypedExpr::RecordUpdate { + constructor, + arguments, + .. + } => { + collect_aliases_for_vars_once(constructor, tracked); + for arg in arguments { + collect_aliases_for_vars_once(&arg.value, tracked); + } + } + _ => {} + } +} + +fn has_partial_datum_validation_for_vars(expr: &UntypedExpr, tracked: &HashSet) -> bool { + match expr { + UntypedExpr::Assignment { + value, + patterns, + kind, + .. + } => { + let this_has = kind.is_expect() + && expr_references_any_var(value, tracked) + && patterns + .iter() + .any(|assignment| pattern_has_spread_constructor(&assignment.pattern)); + + this_has || has_partial_datum_validation_for_vars(value, tracked) + } + UntypedExpr::Sequence { expressions, .. } + | UntypedExpr::LogicalOpChain { expressions, .. } => expressions + .iter() + .any(|inner| has_partial_datum_validation_for_vars(inner, tracked)), + UntypedExpr::PipeLine { expressions, .. } => expressions + .iter() + .any(|inner| has_partial_datum_validation_for_vars(inner, tracked)), + UntypedExpr::Call { fun, arguments, .. } => { + has_partial_datum_validation_for_vars(fun, tracked) + || arguments + .iter() + .any(|arg| has_partial_datum_validation_for_vars(&arg.value, tracked)) + } + UntypedExpr::When { + subject, clauses, .. + } => { + has_partial_datum_validation_for_vars(subject, tracked) + || clauses + .iter() + .any(|clause| has_partial_datum_validation_for_vars(&clause.then, tracked)) + } + UntypedExpr::If { + branches, + final_else, + .. + } => { + branches.iter().any(|branch| { + has_partial_datum_validation_for_vars(&branch.condition, tracked) + || has_partial_datum_validation_for_vars(&branch.body, tracked) + }) || has_partial_datum_validation_for_vars(final_else, tracked) + } + UntypedExpr::Fn { body, .. } => has_partial_datum_validation_for_vars(body, tracked), + UntypedExpr::Trace { + then, + label, + arguments, + .. + } => { + has_partial_datum_validation_for_vars(then, tracked) + || has_partial_datum_validation_for_vars(label, tracked) + || arguments + .iter() + .any(|arg| has_partial_datum_validation_for_vars(arg, tracked)) + } + UntypedExpr::TraceIfFalse { value, .. } + | UntypedExpr::FieldAccess { + container: value, .. + } + | UntypedExpr::TupleIndex { tuple: value, .. } + | UntypedExpr::UnOp { value, .. } => has_partial_datum_validation_for_vars(value, tracked), + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => elems + .iter() + .any(|element| has_partial_datum_validation_for_vars(element, tracked)), + UntypedExpr::Pair { fst, snd, .. } => { + has_partial_datum_validation_for_vars(fst, tracked) + || has_partial_datum_validation_for_vars(snd, tracked) + } + UntypedExpr::RecordUpdate { + constructor, + arguments, + .. + } => { + has_partial_datum_validation_for_vars(constructor, tracked) + || arguments + .iter() + .any(|arg| has_partial_datum_validation_for_vars(&arg.value, tracked)) + } + _ => false, + } +} + +fn has_semantic_validation_for_vars(expr: &UntypedExpr, tracked: &HashSet) -> bool { + match expr { + UntypedExpr::BinOp { + left, right, name, .. + } => { + let this_has = matches!(name, BinOp::Eq | BinOp::NotEq) + && (expr_references_any_var(left, tracked) + || expr_references_any_var(right, tracked)); + this_has + || has_semantic_validation_for_vars(left, tracked) + || has_semantic_validation_for_vars(right, tracked) + } + UntypedExpr::Call { fun, arguments, .. } => { + let this_has = arguments + .iter() + .any(|arg| expr_references_any_var(&arg.value, tracked)); + this_has + || has_semantic_validation_for_vars(fun, tracked) + || arguments + .iter() + .any(|arg| has_semantic_validation_for_vars(&arg.value, tracked)) + } + UntypedExpr::FieldAccess { container, .. } => expr_references_any_var(container, tracked), + UntypedExpr::Assignment { value, .. } + | UntypedExpr::TraceIfFalse { value, .. } + | UntypedExpr::TupleIndex { tuple: value, .. } + | UntypedExpr::UnOp { value, .. } => has_semantic_validation_for_vars(value, tracked), + UntypedExpr::Sequence { expressions, .. } + | UntypedExpr::LogicalOpChain { expressions, .. } => expressions + .iter() + .any(|inner| has_semantic_validation_for_vars(inner, tracked)), + UntypedExpr::PipeLine { expressions, .. } => expressions + .iter() + .any(|inner| has_semantic_validation_for_vars(inner, tracked)), + UntypedExpr::When { + subject, clauses, .. + } => { + has_semantic_validation_for_vars(subject, tracked) + || clauses + .iter() + .any(|clause| has_semantic_validation_for_vars(&clause.then, tracked)) + } + UntypedExpr::If { + branches, + final_else, + .. + } => { + branches.iter().any(|branch| { + has_semantic_validation_for_vars(&branch.condition, tracked) + || has_semantic_validation_for_vars(&branch.body, tracked) + }) || has_semantic_validation_for_vars(final_else, tracked) + } + UntypedExpr::Fn { body, .. } => has_semantic_validation_for_vars(body, tracked), + UntypedExpr::Trace { + then, + label, + arguments, + .. + } => { + has_semantic_validation_for_vars(then, tracked) + || has_semantic_validation_for_vars(label, tracked) + || arguments + .iter() + .any(|arg| has_semantic_validation_for_vars(arg, tracked)) + } + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => elems + .iter() + .any(|element| has_semantic_validation_for_vars(element, tracked)), + UntypedExpr::Pair { fst, snd, .. } => { + has_semantic_validation_for_vars(fst, tracked) + || has_semantic_validation_for_vars(snd, tracked) + } + UntypedExpr::RecordUpdate { + constructor, + arguments, + .. + } => { + has_semantic_validation_for_vars(constructor, tracked) + || arguments + .iter() + .any(|arg| has_semantic_validation_for_vars(&arg.value, tracked)) + } + _ => false, + } +} + +fn expr_references_any_var(expr: &UntypedExpr, tracked: &HashSet) -> bool { + match expr { + UntypedExpr::Var { name, .. } => tracked.contains(name), + UntypedExpr::FieldAccess { container, .. } + | UntypedExpr::TraceIfFalse { + value: container, .. + } + | UntypedExpr::TupleIndex { + tuple: container, .. + } + | UntypedExpr::UnOp { + value: container, .. + } => expr_references_any_var(container, tracked), + UntypedExpr::Call { fun, arguments, .. } => { + expr_references_any_var(fun, tracked) + || arguments + .iter() + .any(|arg| expr_references_any_var(&arg.value, tracked)) + } + UntypedExpr::BinOp { left, right, .. } => { + expr_references_any_var(left, tracked) || expr_references_any_var(right, tracked) + } + UntypedExpr::Assignment { value, .. } => expr_references_any_var(value, tracked), + UntypedExpr::Sequence { expressions, .. } + | UntypedExpr::LogicalOpChain { expressions, .. } => expressions + .iter() + .any(|inner| expr_references_any_var(inner, tracked)), + UntypedExpr::PipeLine { expressions, .. } => expressions + .iter() + .any(|inner| expr_references_any_var(inner, tracked)), + UntypedExpr::When { + subject, clauses, .. + } => { + expr_references_any_var(subject, tracked) + || clauses + .iter() + .any(|clause| expr_references_any_var(&clause.then, tracked)) + } + UntypedExpr::If { + branches, + final_else, + .. + } => { + branches.iter().any(|branch| { + expr_references_any_var(&branch.condition, tracked) + || expr_references_any_var(&branch.body, tracked) + }) || expr_references_any_var(final_else, tracked) + } + UntypedExpr::Fn { body, .. } => expr_references_any_var(body, tracked), + UntypedExpr::Trace { + then, + label, + arguments, + .. + } => { + expr_references_any_var(then, tracked) + || expr_references_any_var(label, tracked) + || arguments + .iter() + .any(|arg| expr_references_any_var(arg, tracked)) + } + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => elems + .iter() + .any(|element| expr_references_any_var(element, tracked)), + UntypedExpr::Pair { fst, snd, .. } => { + expr_references_any_var(fst, tracked) || expr_references_any_var(snd, tracked) + } + UntypedExpr::RecordUpdate { + constructor, + arguments, + .. + } => { + expr_references_any_var(constructor, tracked) + || arguments + .iter() + .any(|arg| expr_references_any_var(&arg.value, tracked)) + } + _ => false, + } +} + +fn pattern_has_spread_constructor(pattern: &UntypedPattern) -> bool { + match pattern { + UntypedPattern::Constructor { + spread_location, + arguments, + .. + } => { + spread_location.is_some() + || arguments + .iter() + .any(|arg| pattern_has_spread_constructor(&arg.value)) + } + UntypedPattern::List { elements, tail, .. } => { + elements.iter().any(pattern_has_spread_constructor) + || tail + .as_deref() + .map(pattern_has_spread_constructor) + .unwrap_or(false) + } + UntypedPattern::Pair { fst, snd, .. } => { + pattern_has_spread_constructor(fst) || pattern_has_spread_constructor(snd) + } + UntypedPattern::Tuple { elems, .. } => elems.iter().any(pattern_has_spread_constructor), + UntypedPattern::Assign { pattern, .. } => pattern_has_spread_constructor(pattern), + _ => false, + } +} + +fn has_script_constructor_pattern( + patterns: impl IntoIterator>, +) -> bool { + patterns + .into_iter() + .any(|assignment| pattern_contains_constructor_name(&assignment.borrow().pattern, "Script")) +} + +fn has_inline_datum_constructor_pattern( + patterns: impl IntoIterator>, +) -> bool { + patterns.into_iter().any(|assignment| { + pattern_contains_constructor_name(&assignment.borrow().pattern, "InlineDatum") + }) +} + +fn pattern_contains_constructor_name(pattern: &UntypedPattern, constructor_name: &str) -> bool { + match pattern { + UntypedPattern::Constructor { + name, arguments, .. + } => { + if name == constructor_name { + return true; + } + + arguments + .iter() + .any(|arg| pattern_contains_constructor_name(&arg.value, constructor_name)) + } + UntypedPattern::List { elements, tail, .. } => { + elements + .iter() + .any(|element| pattern_contains_constructor_name(element, constructor_name)) + || tail + .as_deref() + .map(|inner| pattern_contains_constructor_name(inner, constructor_name)) + .unwrap_or(false) + } + UntypedPattern::Pair { fst, snd, .. } => { + pattern_contains_constructor_name(fst, constructor_name) + || pattern_contains_constructor_name(snd, constructor_name) + } + UntypedPattern::Tuple { elems, .. } => elems + .iter() + .any(|element| pattern_contains_constructor_name(element, constructor_name)), + UntypedPattern::Assign { pattern, .. } => { + pattern_contains_constructor_name(pattern, constructor_name) + } + _ => false, + } +} + +fn collect_bound_vars_from_script_pattern( + pattern: &UntypedPattern, + out: &mut Vec<(String, usize)>, + line: usize, +) { + match pattern { + UntypedPattern::Constructor { + name, arguments, .. + } => { + if name == "Script" { + for arg in arguments { + collect_all_pattern_var_names_with_line(&arg.value, out, line); + } + } + + for arg in arguments { + collect_bound_vars_from_script_pattern(&arg.value, out, line); + } + } + UntypedPattern::List { elements, tail, .. } => { + for element in elements { + collect_bound_vars_from_script_pattern(element, out, line); + } + + if let Some(inner) = tail { + collect_bound_vars_from_script_pattern(inner, out, line); + } + } + UntypedPattern::Pair { fst, snd, .. } => { + collect_bound_vars_from_script_pattern(fst, out, line); + collect_bound_vars_from_script_pattern(snd, out, line); + } + UntypedPattern::Tuple { elems, .. } => { + for element in elems { + collect_bound_vars_from_script_pattern(element, out, line); + } + } + UntypedPattern::Assign { pattern, .. } => { + collect_bound_vars_from_script_pattern(pattern, out, line) + } + _ => {} + } +} + +fn collect_bound_vars_from_inline_datum_pattern( + pattern: &UntypedPattern, + out: &mut Vec<(String, usize)>, + line: usize, +) { + match pattern { + UntypedPattern::Constructor { + name, arguments, .. + } => { + if name == "InlineDatum" { + for arg in arguments { + collect_all_pattern_var_names_with_line(&arg.value, out, line); + } + } + + for arg in arguments { + collect_bound_vars_from_inline_datum_pattern(&arg.value, out, line); + } + } + UntypedPattern::List { elements, tail, .. } => { + for element in elements { + collect_bound_vars_from_inline_datum_pattern(element, out, line); + } + + if let Some(inner) = tail { + collect_bound_vars_from_inline_datum_pattern(inner, out, line); + } + } + UntypedPattern::Pair { fst, snd, .. } => { + collect_bound_vars_from_inline_datum_pattern(fst, out, line); + collect_bound_vars_from_inline_datum_pattern(snd, out, line); + } + UntypedPattern::Tuple { elems, .. } => { + for element in elems { + collect_bound_vars_from_inline_datum_pattern(element, out, line); + } + } + UntypedPattern::Assign { pattern, .. } => { + collect_bound_vars_from_inline_datum_pattern(pattern, out, line) + } + _ => {} + } +} + +fn collect_all_pattern_var_names_with_line( + pattern: &UntypedPattern, + out: &mut Vec<(String, usize)>, + line: usize, +) { + match pattern { + UntypedPattern::Var { name, .. } => out.push((name.clone(), line)), + UntypedPattern::Assign { name, pattern, .. } => { + out.push((name.clone(), line)); + collect_all_pattern_var_names_with_line(pattern, out, line); + } + UntypedPattern::List { elements, tail, .. } => { + for element in elements { + collect_all_pattern_var_names_with_line(element, out, line); + } + if let Some(inner) = tail { + collect_all_pattern_var_names_with_line(inner, out, line); + } + } + UntypedPattern::Pair { fst, snd, .. } => { + collect_all_pattern_var_names_with_line(fst, out, line); + collect_all_pattern_var_names_with_line(snd, out, line); + } + UntypedPattern::Tuple { elems, .. } => { + for element in elems { + collect_all_pattern_var_names_with_line(element, out, line); + } + } + UntypedPattern::Constructor { arguments, .. } => { + for arg in arguments { + collect_all_pattern_var_names_with_line(&arg.value, out, line); + } + } + _ => {} + } +} + +fn collect_all_pattern_var_names(pattern: &UntypedPattern, out: &mut HashSet) { + match pattern { + UntypedPattern::Var { name, .. } => { + out.insert(name.clone()); + } + UntypedPattern::Assign { name, pattern, .. } => { + out.insert(name.clone()); + collect_all_pattern_var_names(pattern, out); + } + UntypedPattern::List { elements, tail, .. } => { + for element in elements { + collect_all_pattern_var_names(element, out); + } + if let Some(inner) = tail { + collect_all_pattern_var_names(inner, out); + } + } + UntypedPattern::Pair { fst, snd, .. } => { + collect_all_pattern_var_names(fst, out); + collect_all_pattern_var_names(snd, out); + } + UntypedPattern::Tuple { elems, .. } => { + for element in elems { + collect_all_pattern_var_names(element, out); + } + } + UntypedPattern::Constructor { arguments, .. } => { + for arg in arguments { + collect_all_pattern_var_names(&arg.value, out); + } + } + _ => {} + } +} + +fn collect_var_names(expr: &UntypedExpr, out: &mut HashSet) { + match expr { + UntypedExpr::Var { name, .. } => { + out.insert(name.clone()); + } + UntypedExpr::FieldAccess { container, .. } + | UntypedExpr::TraceIfFalse { + value: container, .. + } + | UntypedExpr::UnOp { + value: container, .. + } + | UntypedExpr::TupleIndex { + tuple: container, .. + } => collect_var_names(container, out), + UntypedExpr::Call { fun, arguments, .. } => { + collect_var_names(fun, out); + for arg in arguments { + collect_var_names(&arg.value, out); + } + } + UntypedExpr::BinOp { left, right, .. } => { + collect_var_names(left, out); + collect_var_names(right, out); + } + _ => {} + } +} + +fn is_value_or_lovelace_expr(expr: &UntypedExpr) -> bool { + match expr { + UntypedExpr::Var { name, .. } => name.contains("value") || name.contains("lovelace"), + UntypedExpr::FieldAccess { + label, container, .. + } => { + label.contains("value") + || label.contains("lovelace") + || is_value_or_lovelace_expr(container) + } + UntypedExpr::Call { fun, arguments, .. } => { + let fn_name_has_signal = matches!(&**fun, UntypedExpr::Var { name, .. } if name.contains("value") || name.contains("lovelace") || name.contains("from_lovelace") || name.contains("lovelace_of")); + fn_name_has_signal + || arguments + .iter() + .any(|arg| is_value_or_lovelace_expr(&arg.value)) + } + UntypedExpr::BinOp { left, right, .. } => { + is_value_or_lovelace_expr(left) || is_value_or_lovelace_expr(right) + } + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => elems.iter().any(is_value_or_lovelace_expr), + UntypedExpr::Pair { fst, snd, .. } => { + is_value_or_lovelace_expr(fst) || is_value_or_lovelace_expr(snd) + } + _ => false, + } +} + +fn contains_without_lovelace(expr: &UntypedExpr) -> bool { + match expr { + UntypedExpr::Call { fun, arguments, .. } => { + let this_has = matches!(&**fun, UntypedExpr::Var { name, .. } if name.contains("without_lovelace")) + || matches!(&**fun, UntypedExpr::FieldAccess { label, .. } if label.contains("without_lovelace")); + + this_has + || contains_without_lovelace(fun) + || arguments + .iter() + .any(|arg| contains_without_lovelace(&arg.value)) + } + UntypedExpr::FieldAccess { container, .. } + | UntypedExpr::TraceIfFalse { + value: container, .. + } + | UntypedExpr::UnOp { + value: container, .. + } + | UntypedExpr::TupleIndex { + tuple: container, .. + } => contains_without_lovelace(container), + UntypedExpr::BinOp { left, right, .. } => { + contains_without_lovelace(left) || contains_without_lovelace(right) + } + UntypedExpr::Tuple { elems, .. } + | UntypedExpr::List { + elements: elems, .. + } => elems.iter().any(contains_without_lovelace), + UntypedExpr::Pair { fst, snd, .. } => { + contains_without_lovelace(fst) || contains_without_lovelace(snd) + } + _ => false, + } +} + +fn collect_pattern_exprs_for_scan( + pattern: &UntypedPattern, + out: &mut Vec<(usize, String)>, + scan: F, +) where + F: Fn(&UntypedExpr, &mut Vec<(usize, String)>), +{ + let _ = pattern; + let _ = out; + let _ = scan; +} + +fn detect_strict_value_equality_text_fallback( + skill: &VulnerabilitySkill, + source: &SourceDoc, +) -> Vec { + source + .lines + .iter() + .enumerate() + .filter_map(|(idx, line)| { + let normalized = line.trim(); + if normalized.starts_with("//") { + return None; + } + + let has_strict_eq = normalized.contains("=="); + let has_value_signal = contains_any(normalized, &["value", "lovelace", "from_lovelace"]); + let safe_pattern = contains_any(normalized, &["without_lovelace", ">="]); + + (has_strict_eq && has_value_signal && !safe_pattern).then_some(VulnerabilityFinding { + title: "Strict value equality in validator logic".to_string(), + severity: skill.severity.clone(), + summary: "Detected strict equality over value/ADA-related expression; this can make validators unsatisfiable under ledger changes.".to_string(), + evidence: vec![format!("{}:{} -> {}", source.file, idx + 1, normalized)], + recommendation: + "Prefer minimum/value-shape checks and avoid exact ADA/value equality unless a strong invariant requires it." + .to_string(), + file: Some(source.file.clone()), + line: Some(idx + 1), + }) + }) + .collect() +} + +fn detect_missing_address_validation_text_fallback( + skill: &VulnerabilitySkill, + source: &SourceDoc, +) -> Vec { + let content = source.content.as_str(); + let lines = source + .lines + .iter() + .map(|line| line.as_str()) + .collect::>(); + + for (idx, line) in lines.iter().enumerate() { + let normalized = line.trim(); + + if normalized.contains("Script(") && normalized.contains("payment_credential") { + let hash_var = if let Some(start) = normalized.find("Script(") { + let after = &normalized[start + 7..]; + after.find(')').map(|end| after[..end].trim().to_string()) + } else { + None + }; + + if let Some(var_name) = hash_var { + if var_name == "_" { + continue; + } + + let search_end = (idx + 30).min(lines.len()); + let found_validation = lines[idx + 1..search_end] + .iter() + .map(|value| value.trim()) + .any(|check| { + (check.contains("==") || check.contains("!=")) && check.contains(&var_name) + }); + + if !found_validation { + return vec![VulnerabilityFinding { + title: "Script credential extracted but not validated".to_string(), + severity: skill.severity.clone(), + summary: format!( + "Output address script credential is extracted as '{}' but never compared or validated against expected value.", + var_name + ), + evidence: vec![format!( + "{}:{} -> Script credential extracted: {}", + source.file, + idx + 1, + normalized + )], + recommendation: + "Add explicit validation that the extracted script credential matches the expected value (e.g., compare with policy_id or known script hash)." + .to_string(), + file: Some(source.file.clone()), + line: Some(idx + 1), + }]; + } + } + } + } + + let _ = content; + vec![] +} + +fn detect_unvalidated_datum_text_fallback( + skill: &VulnerabilitySkill, + source: &SourceDoc, +) -> Vec { + let lines = source + .lines + .iter() + .map(|line| line.as_str()) + .collect::>(); + + for (idx, line) in lines.iter().enumerate() { + let normalized = line.trim(); + + if normalized.contains("InlineDatum(") && normalized.contains("datum:") { + let datum_var = if let Some(start) = normalized.find("InlineDatum(") { + let after = &normalized[start + 12..]; + after.find(')').map(|end| after[..end].trim().to_string()) + } else { + None + }; + + if let Some(var_name) = datum_var { + if var_name == "_" { + continue; + } + + let search_end = (idx + 40).min(lines.len()); + let found_validation = lines[idx + 1..search_end] + .iter() + .map(|value| value.trim()) + .any(|check| { + (check.contains("expect") && check.contains(&var_name)) + || (check.contains(&format!("{}.", var_name))) + || (check.contains("Datum {") && check.contains(&var_name)) + }); + + if !found_validation { + return vec![VulnerabilityFinding { + title: "Datum extracted but not validated".to_string(), + severity: skill.severity.clone(), + summary: + "InlineDatum is extracted from output but its type and fields are never validated." + .to_string(), + evidence: vec![format!( + "{}:{} -> Datum extracted as '{}' but not validated", + source.file, + idx + 1, + var_name + )], + recommendation: + "Add explicit datum type validation (expect Type = datum) and validate all relevant fields." + .to_string(), + file: Some(source.file.clone()), + line: Some(idx + 1), + }]; + } + } + } + } + + vec![] +} + +fn contains_any(content: &str, needles: &[&str]) -> bool { + needles.iter().any(|needle| content.contains(needle)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::commands::audit::model::ValidatorContextMap; + use tempfile::tempdir; + + fn skill(id: &str) -> VulnerabilitySkill { + VulnerabilitySkill { + id: id.to_string(), + name: id.to_string(), + severity: "high".to_string(), + description: "desc".to_string(), + prompt_fragment: "prompt".to_string(), + examples: vec![], + false_positives: vec![], + references: vec![], + tags: vec![], + confidence_hint: None, + guidance_markdown: String::new(), + } + } + + #[test] + fn returns_unsupported_skill_for_unknown_id() { + let tmp = tempdir().expect("tempdir"); + + let findings = collect_findings_for_skill( + &skill("custom-skill-999"), + &[], + &ValidatorContextMap::default(), + tmp.path(), + ) + .expect("analysis should succeed"); + + assert!(findings.is_none()); + } + + #[test] + fn detects_strict_value_equality() { + let tmp = tempdir().expect("tempdir"); + std::fs::write( + tmp.path().join("validator.ak"), + r#"validator test { + spend(_datum, _redeemer, _utxo, transaction) { + expect [output, ..] = transaction.outputs + lovelace_of(output.value) == 2_000_000 + } +}"#, + ) + .expect("write source"); + + let findings = collect_findings_for_skill( + &skill("strict-value-equality-001"), + &["validator.ak".to_string()], + &ValidatorContextMap::default(), + tmp.path(), + ) + .expect("analysis should succeed"); + + assert!(findings.is_some()); + assert!(!findings.unwrap_or_default().is_empty()); + } + + #[test] + fn ignores_safe_value_pattern() { + let tmp = tempdir().expect("tempdir"); + std::fs::write( + tmp.path().join("validator.ak"), + r#"validator test { + spend(_datum, _redeemer, _utxo, transaction) { + expect [output, ..] = transaction.outputs + output.value.without_lovelace() == expected_value + } +}"#, + ) + .expect("write source"); + + let findings = collect_findings_for_skill( + &skill("strict-value-equality-001"), + &["validator.ak".to_string()], + &ValidatorContextMap::default(), + tmp.path(), + ) + .expect("analysis should succeed"); + + assert!(findings.unwrap_or_default().is_empty()); + } + + #[test] + fn detects_missing_address_validation() { + let tmp = tempdir().expect("tempdir"); + std::fs::write( + tmp.path().join("validator.ak"), + r#"use cardano/address.{Address, Script} + +validator test { + mint(_redeemer: Data, policy_id: PolicyId, self: Transaction) { + expect [first_output, ..] = self.outputs + expect Output { + address: Address { + payment_credential: Script(some_hash), + stake_credential: None, + }, + value: val, + datum: NoDatum, + reference_script: None, + } = first_output + + quantity_of(val, policy_id, "token") == 1 + } +}"#, + ) + .expect("write source"); + + let findings = collect_findings_for_skill( + &skill("missing-address-validation-002"), + &["validator.ak".to_string()], + &ValidatorContextMap::default(), + tmp.path(), + ) + .expect("analysis should succeed"); + + assert!(!findings.unwrap_or_default().is_empty()); + } + + #[test] + fn ignores_when_address_validation_exists() { + let tmp = tempdir().expect("tempdir"); + std::fs::write( + tmp.path().join("validator.ak"), + r#"use cardano/address.{Address, Script} + +validator test { + mint(_redeemer: Data, policy_id: PolicyId, self: Transaction) { + expect [first_output, ..] = self.outputs + expect Output { + address: Address { + payment_credential: Script(some_hash), + stake_credential: None, + }, + value: val, + datum: NoDatum, + reference_script: None, + } = first_output + + and { + quantity_of(val, policy_id, "token") == 1, + some_hash == policy_id + } + } +}"#, + ) + .expect("write source"); + + let findings = collect_findings_for_skill( + &skill("missing-address-validation-002"), + &["validator.ak".to_string()], + &ValidatorContextMap::default(), + tmp.path(), + ) + .expect("analysis should succeed"); + + assert!(findings.unwrap_or_default().is_empty()); + } + + #[test] + fn detects_unvalidated_datum() { + let tmp = tempdir().expect("tempdir"); + std::fs::write( + tmp.path().join("validator.ak"), + r#"use cardano/transaction.{InlineDatum, Output} + +validator test { + spend(_datum, _redeemer, _utxo, transaction) { + expect [script_output] = list.filter( + transaction.outputs, + fn(output) { output.address == script_address } + ) + + expect Output { + address: o_address, + value: _value, + datum: InlineDatum(script_datum), + reference_script: None, + } = script_output + + o_address == script_address + } +}"#, + ) + .expect("write source"); + + let findings = collect_findings_for_skill( + &skill("unvalidated-datum-003"), + &["validator.ak".to_string()], + &ValidatorContextMap::default(), + tmp.path(), + ) + .expect("analysis should succeed"); + + assert!(!findings.unwrap_or_default().is_empty()); + } + + #[test] + fn ignores_when_datum_is_validated() { + let tmp = tempdir().expect("tempdir"); + std::fs::write( + tmp.path().join("validator.ak"), + r#"use cardano/transaction.{InlineDatum, Output} + +pub type Datum { + owner: ByteArray, +} + +validator test { + spend(_datum, _redeemer, _utxo, transaction) { + expect [script_output] = list.filter( + transaction.outputs, + fn(output) { output.address == script_address } + ) + + expect Output { + address: o_address, + value: _value, + datum: InlineDatum(script_datum), + reference_script: None, + } = script_output + + expect expected_datum: Datum = script_datum + expected_datum.owner == some_owner + o_address == script_address + } +}"#, + ) + .expect("write source"); + + let findings = collect_findings_for_skill( + &skill("unvalidated-datum-003"), + &["validator.ak".to_string()], + &ValidatorContextMap::default(), + tmp.path(), + ) + .expect("analysis should succeed"); + + assert!(findings.unwrap_or_default().is_empty()); + } +} diff --git a/src/commands/audit/providers/mod.rs b/src/commands/audit/providers/mod.rs index 1a05dad..98c2e89 100644 --- a/src/commands/audit/providers/mod.rs +++ b/src/commands/audit/providers/mod.rs @@ -1,4 +1,5 @@ mod anthropic; +mod heuristic; mod openai; mod scaffold; mod shared; @@ -6,13 +7,14 @@ mod shared; use miette::{Context, IntoDiagnostic, Result}; use std::path::Path; +use super::Args; use super::model::{ MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, VulnerabilitySkill, }; -use super::Args; use self::anthropic::AnthropicProvider; +use self::heuristic::HeuristicProvider; use self::openai::OpenAiProvider; use self::scaffold::ScaffoldProvider; @@ -52,6 +54,7 @@ fn load_api_key(api_key_env: &str, provider_name: &str) -> Result { pub fn build_provider(args: &Args) -> Result> { match args.provider.to_ascii_lowercase().as_str() { "scaffold" => Ok(Box::new(ScaffoldProvider)), + "heuristic" => Ok(Box::new(HeuristicProvider)), "openai" => { let endpoint = args .endpoint @@ -114,7 +117,7 @@ pub fn build_provider(args: &Args) -> Result> { ollama_compat: true, })), value => Err(miette::miette!( - "Unsupported provider '{}'. Expected one of: scaffold, openai, anthropic, ollama", + "Unsupported provider '{}'. Expected one of: scaffold, heuristic, openai, anthropic, ollama", value )), } diff --git a/src/commands/audit/providers/openai.rs b/src/commands/audit/providers/openai.rs index a8a4fdc..8bc9047 100644 --- a/src/commands/audit/providers/openai.rs +++ b/src/commands/audit/providers/openai.rs @@ -1,15 +1,14 @@ use miette::{Context, IntoDiagnostic, Result}; -use serde_json::{json, Value}; +use serde_json::{Value, json}; use std::path::Path; +use super::AnalysisProvider; use super::shared::{ - block_on_runtime_aware, build_agent_system_prompt, build_initial_user_prompt, - emit_reasoning_double_line_break, emit_reasoning_line_break, finalize_content_stdout, - finalize_reasoning_stdout, log_agent_progress, run_agent_loop, + ContentStreamState, ReasoningStreamState, block_on_runtime_aware, build_agent_system_prompt, + build_initial_user_prompt, emit_reasoning_double_line_break, emit_reasoning_line_break, + finalize_content_stdout, finalize_reasoning_stdout, log_agent_progress, run_agent_loop, stream_content_delta_to_stdout, stream_reasoning_delta_to_stdout, - ContentStreamState, ReasoningStreamState, }; -use super::AnalysisProvider; use crate::commands::audit::model::{ MiniPrompt, PermissionPromptSpec, ProviderSpec, SkillIterationResult, ValidatorContextMap, VulnerabilitySkill, @@ -227,10 +226,10 @@ fn maybe_emit_reasoning_line_break_on_summary_change( return; }; - if let Some(previous_index) = state.last_summary_index { - if previous_index != current_index { - emit_reasoning_double_line_break(enabled, state); - } + if let Some(previous_index) = state.last_summary_index + && previous_index != current_index + { + emit_reasoning_double_line_break(enabled, state); } state.last_summary_index = Some(current_index); @@ -310,28 +309,25 @@ fn extract_responses_content_delta(event: &Value) -> Option { } fn extract_responses_output_text(response_json: &Value) -> Option { - if let Some(output_text) = response_json.get("output_text").and_then(Value::as_str) { - if !output_text.trim().is_empty() { - return Some(output_text.to_string()); - } + if let Some(output_text) = response_json.get("output_text").and_then(Value::as_str) + && !output_text.trim().is_empty() + { + return Some(output_text.to_string()); } let mut chunks = Vec::new(); if let Some(outputs) = response_json.get("output").and_then(Value::as_array) { for item in outputs { - let item_type = item - .get("type") - .and_then(Value::as_str) - .unwrap_or_default(); + let item_type = item.get("type").and_then(Value::as_str).unwrap_or_default(); if (item_type == "output_text" || item_type == "text") && item.get("text").and_then(Value::as_str).is_some() { - if let Some(text) = item.get("text").and_then(Value::as_str) { - if !text.trim().is_empty() { - chunks.push(text.to_string()); - } + if let Some(text) = item.get("text").and_then(Value::as_str) + && !text.trim().is_empty() + { + chunks.push(text.to_string()); } continue; @@ -345,12 +341,10 @@ fn extract_responses_output_text(response_json: &Value) -> Option { .unwrap_or_default(); if (block_type == "output_text" || block_type == "text") && block.get("text").and_then(Value::as_str).is_some() + && let Some(text) = block.get("text").and_then(Value::as_str) + && !text.trim().is_empty() { - if let Some(text) = block.get("text").and_then(Value::as_str) { - if !text.trim().is_empty() { - chunks.push(text.to_string()); - } - } + chunks.push(text.to_string()); } } } @@ -369,27 +363,24 @@ fn extract_responses_reasoning_summary(response_json: &Value) -> Option if let Some(outputs) = response_json.get("output").and_then(Value::as_array) { for item in outputs { - let item_type = item - .get("type") - .and_then(Value::as_str) - .unwrap_or_default(); + let item_type = item.get("type").and_then(Value::as_str).unwrap_or_default(); if item_type != "reasoning" { continue; } - if let Some(summary_text) = item.get("summary").and_then(Value::as_str) { - if !summary_text.trim().is_empty() { - chunks.push(summary_text.to_string()); - } + if let Some(summary_text) = item.get("summary").and_then(Value::as_str) + && !summary_text.trim().is_empty() + { + chunks.push(summary_text.to_string()); } if let Some(summary_items) = item.get("summary").and_then(Value::as_array) { for entry in summary_items { - if let Some(text) = entry.get("text").and_then(Value::as_str) { - if !text.trim().is_empty() { - chunks.push(text.to_string()); - } + if let Some(text) = entry.get("text").and_then(Value::as_str) + && !text.trim().is_empty() + { + chunks.push(text.to_string()); } } } @@ -607,10 +598,7 @@ async fn non_stream_chat_attempt( { log_agent_progress( ai_logs, - format!( - "🧠 Model reasoning output:\n{}", - &reasoning_text - ), + format!("🧠 Model reasoning output:\n{}", &reasoning_text), ); } @@ -641,10 +629,7 @@ async fn non_stream_responses_attempt( if let Some(reasoning_summary) = extract_responses_reasoning_summary(&response_json) { log_agent_progress( ai_logs, - format!( - "🧠 Model reasoning summary:\n{}", - &reasoning_summary - ), + format!("🧠 Model reasoning summary:\n{}", &reasoning_summary), ); } @@ -685,12 +670,15 @@ impl AnalysisProvider for OpenAiProvider { project_root: &Path, permission_prompt: &PermissionPromptSpec, ) -> Result { - let canonical_root = project_root.canonicalize().into_diagnostic().with_context(|| { - format!( - "Failed to canonicalize project root {}", - project_root.display() - ) - })?; + let canonical_root = project_root + .canonicalize() + .into_diagnostic() + .with_context(|| { + format!( + "Failed to canonicalize project root {}", + project_root.display() + ) + })?; let api_family = detect_api_family(&self.endpoint, self.ollama_compat); @@ -715,12 +703,14 @@ impl AnalysisProvider for OpenAiProvider { run_agent_loop( skill, - &self.endpoint, - self.ai_logs, - &canonical_root, - permission_prompt, + super::shared::AgentLoopContext { + endpoint: &self.endpoint, + ai_logs: self.ai_logs, + project_root: &canonical_root, + permission_prompt, + provider_label: "AI provider", + }, &mut messages, - "AI provider", |messages| { block_on_runtime_aware(async { let client = reqwest::Client::new(); diff --git a/src/commands/audit/providers/shared.rs b/src/commands/audit/providers/shared.rs index dc1f2bc..e556e07 100644 --- a/src/commands/audit/providers/shared.rs +++ b/src/commands/audit/providers/shared.rs @@ -1,6 +1,6 @@ use miette::{Context, IntoDiagnostic, Result}; use serde::Deserialize; -use serde_json::{json, Value}; +use serde_json::{Value, json}; use std::io::{self, Write}; use std::path::{Path, PathBuf}; use std::process::Command; @@ -9,8 +9,7 @@ use tokio::runtime::Handle; use crate::commands::audit::model::{ MiniPrompt, PermissionPromptSpec, SkillIterationResult, ValidatorContextMap, - VulnerabilityFinding, - VulnerabilitySkill, + VulnerabilityFinding, VulnerabilitySkill, }; pub(super) const MAX_AGENT_STEPS: usize = 25; @@ -175,7 +174,10 @@ pub(super) fn build_initial_user_prompt( ) -> String { INITIAL_USER_PROMPT_TEMPLATE .replace("{{SKILL}}", &prompt.text) - .replace("{{SOURCE_REFERENCES}}", &render_source_references(source_references)) + .replace( + "{{SOURCE_REFERENCES}}", + &render_source_references(source_references), + ) .replace( "{{VALIDATOR_CONTEXT_MAP}}", &render_validator_context_map(validator_context), @@ -192,19 +194,31 @@ pub(super) fn build_tool_result_user_prompt(request: &ReadRequest, output: &str) .replace("{{OUTPUT}}", output) } +pub(super) struct AgentLoopContext<'a> { + pub(super) endpoint: &'a str, + pub(super) ai_logs: bool, + pub(super) project_root: &'a Path, + pub(super) permission_prompt: &'a PermissionPromptSpec, + pub(super) provider_label: &'a str, +} + pub(super) fn run_agent_loop( skill: &VulnerabilitySkill, - endpoint: &str, - ai_logs: bool, - project_root: &Path, - permission_prompt: &PermissionPromptSpec, + context: AgentLoopContext<'_>, messages: &mut Vec, - provider_label: &str, mut request_model: F, ) -> Result where F: FnMut(&[Value]) -> Result, { + let AgentLoopContext { + endpoint, + ai_logs, + project_root, + permission_prompt, + provider_label, + } = context; + for step_idx in 0..MAX_AGENT_STEPS { log_agent_progress( ai_logs, @@ -293,7 +307,10 @@ where AgentAction::ReadRequest(request) => { log_agent_progress( ai_logs, - format!("Model requested: {}", describe_read_request_friendly(&request)), + format!( + "Model requested: {}", + describe_read_request_friendly(&request) + ), ); log_agent_progress( @@ -337,7 +354,10 @@ fn render_permission_prompt(permission_prompt: &PermissionPromptSpec) -> String "{{ allowed_commands }}", &permission_prompt.allowed_commands.join(", "), ) - .replace("{{ scope_rules }}", &permission_prompt.scope_rules.join("\n- ")) + .replace( + "{{ scope_rules }}", + &permission_prompt.scope_rules.join("\n- "), + ) } fn render_source_references(source_references: &[String]) -> String { @@ -500,7 +520,10 @@ fn enforce_read_scope( return Ok(()); } - if matches!(request, ReadRequest::ListDir { .. } | ReadRequest::FindFiles { .. }) { + if matches!( + request, + ReadRequest::ListDir { .. } | ReadRequest::FindFiles { .. } + ) { return Err(miette::miette!( "Request denied by strict read scope: directory listing and file discovery are not allowed" )); @@ -708,7 +731,11 @@ pub(super) fn summarize_read_request(request: &ReadRequest) -> String { ), ReadRequest::ListDir { path } => format!("list_dir {}", path), ReadRequest::FindFiles { path, glob } => { - format!("find_files path={} glob={}", path, glob.as_deref().unwrap_or("*")) + format!( + "find_files path={} glob={}", + path, + glob.as_deref().unwrap_or("*") + ) } } } @@ -737,10 +764,7 @@ pub(super) fn describe_read_request_friendly(request: &ReadRequest) -> String { } } -pub(super) fn render_tool_output_for_log( - request: &ReadRequest, - output: &str, -) -> String { +pub(super) fn render_tool_output_for_log(request: &ReadRequest, output: &str) -> String { match request { ReadRequest::ReadFile { path } => { format!( diff --git a/tests/e2e/happy_path.rs b/tests/e2e/happy_path.rs index e292409..a7de269 100644 --- a/tests/e2e/happy_path.rs +++ b/tests/e2e/happy_path.rs @@ -178,3 +178,29 @@ fn aiken_audit_runs_in_initialized_project() { "expected at least one analysis iteration" ); } + +#[test] +#[cfg(feature = "unstable")] +fn aiken_audit_runs_with_heuristic_provider() { + let ctx = TestContext::new(); + let init_result = ctx.run_trix(&["init", "--yes"]); + assert_success(&init_result); + + let result = ctx.run_trix(&["audit", "--provider", "heuristic"]); + + assert_success(&result); + assert_output_contains(&result, "EXPERIMENTAL"); + + ctx.assert_file_exists(".tx3/audit/state.json"); + ctx.assert_file_exists(".tx3/audit/vulnerabilities.md"); + + let state_content = ctx.read_file(".tx3/audit/state.json"); + let state: AnalysisStateJson = + serde_json::from_str(&state_content).expect("state.json should be valid AnalysisStateJson"); + + assert_eq!(state.provider.name, "heuristic"); + assert!( + !state.iterations.is_empty(), + "expected at least one analysis iteration" + ); +}