From 39282bb16aac3474c15101af07be4ef2cbdb04b6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 2 Apr 2026 08:18:18 +0000 Subject: [PATCH 1/4] chore(deps): bump github.com/mcpchecker/mcpchecker in /e2e-tests/tools Bumps [github.com/mcpchecker/mcpchecker](https://github.com/mcpchecker/mcpchecker) from 0.0.12 to 0.0.14. - [Release notes](https://github.com/mcpchecker/mcpchecker/releases) - [Changelog](https://github.com/mcpchecker/mcpchecker/blob/main/CHANGELOG.md) - [Commits](https://github.com/mcpchecker/mcpchecker/compare/v0.0.12...v0.0.14) --- updated-dependencies: - dependency-name: github.com/mcpchecker/mcpchecker dependency-version: 0.0.14 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- e2e-tests/tools/go.mod | 18 +++++++++--------- e2e-tests/tools/go.sum | 40 ++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/e2e-tests/tools/go.mod b/e2e-tests/tools/go.mod index 9e9e72d..efacb84 100644 --- a/e2e-tests/tools/go.mod +++ b/e2e-tests/tools/go.mod @@ -4,20 +4,20 @@ go 1.26.1 require ( github.com/fullstorydev/grpcurl v1.9.3 - github.com/mcpchecker/mcpchecker v0.0.12 + github.com/mcpchecker/mcpchecker v0.0.14 github.com/rhysd/actionlint v1.7.12 ) require ( cel.dev/expr v0.25.1 // indirect - charm.land/fantasy v0.16.0 // indirect + charm.land/fantasy v0.17.1 // indirect cloud.google.com/go v0.123.0 // indirect cloud.google.com/go/auth v0.18.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/compute/metadata v0.9.0 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/aws/aws-sdk-go-v2 v1.41.4 // indirect - github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7 // indirect github.com/aws/aws-sdk-go-v2/config v1.32.12 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.19.12 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.20 // indirect @@ -89,7 +89,7 @@ require ( github.com/google/s2a-go v0.1.9 // indirect github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect - github.com/googleapis/gax-go/v2 v2.17.0 // indirect + github.com/googleapis/gax-go/v2 v2.18.0 // indirect github.com/gorilla/websocket v1.5.3 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect github.com/in-toto/attestation v1.1.2 // indirect @@ -99,7 +99,7 @@ require ( github.com/jhump/protoreflect v1.17.0 // indirect github.com/kaptinlin/go-i18n v0.2.12 // indirect github.com/kaptinlin/jsonpointer v0.4.17 // indirect - github.com/kaptinlin/jsonschema v0.7.5 // indirect + github.com/kaptinlin/jsonschema v0.7.6 // indirect github.com/kaptinlin/messageformat-go v0.4.18 // indirect github.com/mailru/easyjson v0.9.1 // indirect github.com/mattn/go-colorable v0.1.14 // indirect @@ -157,10 +157,10 @@ require ( golang.org/x/text v0.35.0 // indirect golang.org/x/time v0.15.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect - google.golang.org/api v0.270.0 // indirect - google.golang.org/genai v1.50.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171 // indirect + google.golang.org/api v0.271.0 // indirect + google.golang.org/genai v1.51.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260311181403-84a4fc48630c // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260311181403-84a4fc48630c // indirect google.golang.org/grpc v1.79.2 // indirect google.golang.org/protobuf v1.36.11 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/e2e-tests/tools/go.sum b/e2e-tests/tools/go.sum index bec6241..4e51a69 100644 --- a/e2e-tests/tools/go.sum +++ b/e2e-tests/tools/go.sum @@ -1,7 +1,7 @@ cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= -charm.land/fantasy v0.16.0 h1:vE/6sR9nPcSD8qXJXX6wR8NXjtWlBVAzwQmTh5pHVrs= -charm.land/fantasy v0.16.0/go.mod h1:VZjpXVh7IgeiIzGQybEnKzd68ofDsRj94+kzH1ZCAfQ= +charm.land/fantasy v0.17.1 h1:SQzfnyJPDuQWt6e//KKmQmEEXdqHMC0IZz10XwkLcEM= +charm.land/fantasy v0.17.1/go.mod h1:FF5ALCCHETacHJPBqU42CtwMInYQ0ul52fdzIHQMbQk= cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= cloud.google.com/go/auth v0.18.2 h1:+Nbt5Ev0xEqxlNjd6c+yYUeosQ5TtEUaNcN/3FozlaM= @@ -40,8 +40,8 @@ github.com/aws/aws-sdk-go v1.55.7 h1:UJrkFq7es5CShfBwlWAC8DA077vp8PyVbQd3lqLiztE github.com/aws/aws-sdk-go v1.55.7/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= github.com/aws/aws-sdk-go-v2 v1.41.4 h1:10f50G7WyU02T56ox1wWXq+zTX9I1zxG46HYuG1hH/k= github.com/aws/aws-sdk-go-v2 v1.41.4/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 h1:N4lRUXZpZ1KVEUn6hxtco/1d2lgYhNn1fHkkl8WhlyQ= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7 h1:3kGOqnh1pPeddVa/E37XNTaWJ8W6vrbYV9lJEkCnhuY= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI= github.com/aws/aws-sdk-go-v2/config v1.32.12 h1:O3csC7HUGn2895eNrLytOJQdoL2xyJy0iYXhoZ1OmP0= github.com/aws/aws-sdk-go-v2/config v1.32.12/go.mod h1:96zTvoOFR4FURjI+/5wY1vc1ABceROO4lWgWJuxgy0g= github.com/aws/aws-sdk-go-v2/credentials v1.19.12 h1:oqtA6v+y5fZg//tcTWahyN9PEn5eDU/Wpvc2+kJ4aY8= @@ -224,8 +224,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.14 h1:yh8ncqsbUY4shRD5dA6RlzjJaT4hi3kII+zYw8wmLb8= github.com/googleapis/enterprise-certificate-proxy v0.3.14/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= -github.com/googleapis/gax-go/v2 v2.17.0 h1:RksgfBpxqff0EZkDWYuz9q/uWsTVz+kf43LsZ1J6SMc= -github.com/googleapis/gax-go/v2 v2.17.0/go.mod h1:mzaqghpQp4JDh3HvADwrat+6M3MOIDp5YKHhb9PAgDY= +github.com/googleapis/gax-go/v2 v2.18.0 h1:jxP5Uuo3bxm3M6gGtV94P4lliVetoCB4Wk2x8QA86LI= +github.com/googleapis/gax-go/v2 v2.18.0/go.mod h1:uSzZN4a356eRG985CzJ3WfbFSpqkLTjsnhWGJR6EwrE= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= @@ -284,8 +284,8 @@ github.com/kaptinlin/go-i18n v0.2.12 h1:ywDsvb4KDFddMC2dpI/rrIzGU2mWUSvHmWUm9BMs github.com/kaptinlin/go-i18n v0.2.12/go.mod h1:pVcu9qsW5pOIOoZFJXesRYmLos1vMQrby70JPAoWmJU= github.com/kaptinlin/jsonpointer v0.4.17 h1:mY9k8ciWncxbsECyaxKnR0MdmxamNdp2tLQkAKVrtSk= github.com/kaptinlin/jsonpointer v0.4.17/go.mod h1:SsfsjqnHG5zuKo1DTBzk1VknaHlL4osHw+X9kZKukpU= -github.com/kaptinlin/jsonschema v0.7.5 h1:jkK4a3NyzNoGlvu12CsL3IcqNMVa5sL51HPVa0nWcPY= -github.com/kaptinlin/jsonschema v0.7.5/go.mod h1:3gIWnptl+SWMyfMR2r4TXXd0xsQZ1m50AKrwmcUONSg= +github.com/kaptinlin/jsonschema v0.7.6 h1:UUMqZGFAk7nOzQsYAxvgygm4wpDp/nwXxA4VP9mCPCs= +github.com/kaptinlin/jsonschema v0.7.6/go.mod h1:GGk/oE+F1lWUfYrzKaCf4QWZmMdytt0LL4XdFEFB0LE= github.com/kaptinlin/messageformat-go v0.4.18 h1:RBlHVWgZyoxTcUgGWBsl2AcyScq/urqbLZvzgryTmSI= github.com/kaptinlin/messageformat-go v0.4.18/go.mod h1:ntI3154RnqJgr7GaC+vZBnIExl2V3sv9selvRNNEM24= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= @@ -306,8 +306,8 @@ github.com/mattn/go-runewidth v0.0.21 h1:jJKAZiQH+2mIinzCJIaIG9Be1+0NR+5sz/lYEEj github.com/mattn/go-runewidth v0.0.21/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk= github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y= -github.com/mcpchecker/mcpchecker v0.0.12 h1:nr5kZwxwHMM+kc7dL0hgtYDpVxaTcC803ITLzVnzJXo= -github.com/mcpchecker/mcpchecker v0.0.12/go.mod h1:97gE2mxQZy7XJzJZsd5oM6I64T0Ax7TKdXe/XgNdlcY= +github.com/mcpchecker/mcpchecker v0.0.14 h1:2JVHqY31Jun+fTsrigeNsoV91dwL6BbxIL5DwdedNTQ= +github.com/mcpchecker/mcpchecker v0.0.14/go.mod h1:3PrhvuqDcBhNJaYzXE/TfSQP7X9Ba0JfpOZAGcYZ+Hw= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= @@ -480,16 +480,16 @@ golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhS golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/api v0.270.0 h1:4rJZbIuWSTohczG9mG2ukSDdt9qKx4sSSHIydTN26L4= -google.golang.org/api v0.270.0/go.mod h1:5+H3/8DlXpQWrSz4RjGGwz5HfJAQSEI8Bc6JqQNH77U= -google.golang.org/genai v1.50.0 h1:yHKV/vjoeN9PJ3iF0ur4cBZco4N3Kl7j09rMq7XSoWk= -google.golang.org/genai v1.50.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk= -google.golang.org/genproto v0.0.0-20260226221140-a57be14db171 h1:RxhCsti413yL0IjU9dVvuTbCISo8gs3RW1jPMStck+4= -google.golang.org/genproto v0.0.0-20260226221140-a57be14db171/go.mod h1:uhvzakVEqAuXU3TC2JCsxIRe5f77l+JySE3EqPoMyqM= -google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171 h1:tu/dtnW1o3wfaxCOjSLn5IRX4YDcJrtlpzYkhHhGaC4= -google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171/go.mod h1:M5krXqk4GhBKvB596udGL3UyjL4I1+cTbK0orROM9ng= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171 h1:ggcbiqK8WWh6l1dnltU4BgWGIGo+EVYxCaAPih/zQXQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/api v0.271.0 h1:cIPN4qcUc61jlh7oXu6pwOQqbJW2GqYh5PS6rB2C/JY= +google.golang.org/api v0.271.0/go.mod h1:CGT29bhwkbF+i11qkRUJb2KMKqcJ1hdFceEIRd9u64Q= +google.golang.org/genai v1.51.0 h1:IZGuUqgfx40INv3hLFGCbOSGp0qFqm7LVmDghzNIYqg= +google.golang.org/genai v1.51.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk= +google.golang.org/genproto v0.0.0-20260311181403-84a4fc48630c h1:ZhFDeBMmFc/4g8/GwxnJ4rzB3O4GwQVNr+8Mh7Y5z4g= +google.golang.org/genproto v0.0.0-20260311181403-84a4fc48630c/go.mod h1:hf4r/rBuzaTkLUWRO03771Xvcs6P5hwdQK3UUEJjqo0= +google.golang.org/genproto/googleapis/api v0.0.0-20260311181403-84a4fc48630c h1:OyQPd6I3pN/9gDxz6L13kYGJgqkpdrAohJRBeXyxlgI= +google.golang.org/genproto/googleapis/api v0.0.0-20260311181403-84a4fc48630c/go.mod h1:X2gu9Qwng7Nn009s/r3RUxqkzQNqOrAy79bluY7ojIg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260311181403-84a4fc48630c h1:xgCzyF2LFIO/0X2UAoVRiXKU5Xg6VjToG4i2/ecSswk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260311181403-84a4fc48630c/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= google.golang.org/grpc v1.79.2 h1:fRMD94s2tITpyJGtBBn7MkMseNpOZU8ZxgC3MMBaXRU= google.golang.org/grpc v1.79.2/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= From 9ecf9ed310b6119e5c37c8f65a5fc5ea5a38978f Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 2 Apr 2026 14:02:17 +0200 Subject: [PATCH 2/4] fix(e2e): migrate tasks to mcpchecker v1alpha2 format mcpchecker v0.0.14 requires tasks to use the v1alpha2 format for proper llmJudge step execution. The v1alpha1 format's verify step with contains was causing "cannot run llmJudge step before agent" errors due to the new validation in the agent output extraction. Changes: - Add apiVersion: mcpchecker/v1alpha2 to all task files - Migrate steps to spec with structured verify phase - Wrap contains verification in llmJudge step declaration Co-Authored-By: Claude Sonnet 4.5 --- e2e-tests/mcpchecker/tasks/cve-cluster-does-exist.yaml | 8 +++++--- .../mcpchecker/tasks/cve-cluster-does-not-exist.yaml | 8 +++++--- e2e-tests/mcpchecker/tasks/cve-cluster-list.yaml | 8 +++++--- e2e-tests/mcpchecker/tasks/cve-clusters-general.yaml | 8 +++++--- e2e-tests/mcpchecker/tasks/cve-detected-clusters.yaml | 8 +++++--- e2e-tests/mcpchecker/tasks/cve-detected-workloads.yaml | 8 +++++--- e2e-tests/mcpchecker/tasks/cve-log4shell.yaml | 8 +++++--- e2e-tests/mcpchecker/tasks/cve-multiple.yaml | 8 +++++--- e2e-tests/mcpchecker/tasks/cve-nonexistent.yaml | 8 +++++--- e2e-tests/mcpchecker/tasks/list-clusters.yaml | 8 +++++--- e2e-tests/mcpchecker/tasks/rhsa-not-supported.yaml | 8 +++++--- 11 files changed, 55 insertions(+), 33 deletions(-) diff --git a/e2e-tests/mcpchecker/tasks/cve-cluster-does-exist.yaml b/e2e-tests/mcpchecker/tasks/cve-cluster-does-exist.yaml index dfe6ad8..f1729c1 100644 --- a/e2e-tests/mcpchecker/tasks/cve-cluster-does-exist.yaml +++ b/e2e-tests/mcpchecker/tasks/cve-cluster-does-exist.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "cve-cluster-does-exist" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: "A response indicating whether CVE-2016-1000031 is detected in staging-central-cluster" prompt: inline: "Is this CVE-2016-1000031 detected in cluster name staging-central-cluster?" - verify: - contains: "A response indicating whether CVE-2016-1000031 is detected in staging-central-cluster" diff --git a/e2e-tests/mcpchecker/tasks/cve-cluster-does-not-exist.yaml b/e2e-tests/mcpchecker/tasks/cve-cluster-does-not-exist.yaml index 69ed883..14fec64 100644 --- a/e2e-tests/mcpchecker/tasks/cve-cluster-does-not-exist.yaml +++ b/e2e-tests/mcpchecker/tasks/cve-cluster-does-not-exist.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "cve-cluster-does-not-exist" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: "A response indicating that CVE-2024-52577 is not detected in prod-hal-9000 cluster, or that the cluster does not exist" prompt: inline: "Is this CVE-2024-52577 detected in cluster name prod-hal-9000?" - verify: - contains: "A response indicating that CVE-2024-52577 is not detected in prod-hal-9000 cluster, or that the cluster does not exist" diff --git a/e2e-tests/mcpchecker/tasks/cve-cluster-list.yaml b/e2e-tests/mcpchecker/tasks/cve-cluster-list.yaml index 0401155..f368ea8 100644 --- a/e2e-tests/mcpchecker/tasks/cve-cluster-list.yaml +++ b/e2e-tests/mcpchecker/tasks/cve-cluster-list.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "cve-cluster-list" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: "A response indicating whether CVE-2024-52577 is detected in clusters" prompt: inline: "Is this CVE-2024-52577 detected in any of my clusters?" - verify: - contains: "A response indicating whether CVE-2024-52577 is detected in clusters" diff --git a/e2e-tests/mcpchecker/tasks/cve-clusters-general.yaml b/e2e-tests/mcpchecker/tasks/cve-clusters-general.yaml index 44050b3..d964c70 100644 --- a/e2e-tests/mcpchecker/tasks/cve-clusters-general.yaml +++ b/e2e-tests/mcpchecker/tasks/cve-clusters-general.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "cve-clusters-general" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: "A response indicating whether CVE-2021-31805 is detected in any clusters, with specific cluster names if detected, or a statement that no clusters have this CVE." prompt: inline: "Is this CVE-2021-31805 detected in my clusters?" - verify: - contains: "A response indicating whether CVE-2021-31805 is detected in any clusters, with specific cluster names if detected, or a statement that no clusters have this CVE." diff --git a/e2e-tests/mcpchecker/tasks/cve-detected-clusters.yaml b/e2e-tests/mcpchecker/tasks/cve-detected-clusters.yaml index 04511ed..10f2bc0 100644 --- a/e2e-tests/mcpchecker/tasks/cve-detected-clusters.yaml +++ b/e2e-tests/mcpchecker/tasks/cve-detected-clusters.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "cve-detected-clusters" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: "A response indicating whether CVE-2016-1000031 is detected in clusters, nodes, or deployments" prompt: inline: "Is this CVE-2016-1000031 detected in my clusters?" - verify: - contains: "A response indicating whether CVE-2016-1000031 is detected in clusters, nodes, or deployments" diff --git a/e2e-tests/mcpchecker/tasks/cve-detected-workloads.yaml b/e2e-tests/mcpchecker/tasks/cve-detected-workloads.yaml index bc5bac6..09c62ff 100644 --- a/e2e-tests/mcpchecker/tasks/cve-detected-workloads.yaml +++ b/e2e-tests/mcpchecker/tasks/cve-detected-workloads.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "cve-detected-workloads" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: "CVE-2021-31805 is detected in deployments: struts-app (staging-cluster/applications) and legacy-portal (development-cluster/production)." prompt: inline: "Is this CVE-2021-31805 detected in my workloads?" - verify: - contains: "CVE-2021-31805 is detected in deployments: struts-app (staging-cluster/applications) and legacy-portal (development-cluster/production)." diff --git a/e2e-tests/mcpchecker/tasks/cve-log4shell.yaml b/e2e-tests/mcpchecker/tasks/cve-log4shell.yaml index aa76c12..34daf04 100644 --- a/e2e-tests/mcpchecker/tasks/cve-log4shell.yaml +++ b/e2e-tests/mcpchecker/tasks/cve-log4shell.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "cve-log4shell" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: The commonly used name "log4shell" is resolved into the exact CVE number (CVE-2021-44228). And it's detected in a few deployments. prompt: inline: "Is log4shell CVE detected in my cluster fleet?" - verify: - contains: The commonly used name "log4shell" is resolved into the exact CVE number (CVE-2021-44228). And it's detected in a few deployments. diff --git a/e2e-tests/mcpchecker/tasks/cve-multiple.yaml b/e2e-tests/mcpchecker/tasks/cve-multiple.yaml index ef726ea..039fe94 100644 --- a/e2e-tests/mcpchecker/tasks/cve-multiple.yaml +++ b/e2e-tests/mcpchecker/tasks/cve-multiple.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "cve-multiple" difficulty: medium -steps: +spec: + verify: + - llmJudge: + contains: "Both CVEs (CVE-2021-31805 and CVE-2016-1000031), along with the cluster names, deployments, and namespaces where they are detected." prompt: inline: "Are CVE-2021-31805 and CVE-2016-1000031 detected in my cluster fleet?" - verify: - contains: "Both CVEs (CVE-2021-31805 and CVE-2016-1000031), along with the cluster names, deployments, and namespaces where they are detected." diff --git a/e2e-tests/mcpchecker/tasks/cve-nonexistent.yaml b/e2e-tests/mcpchecker/tasks/cve-nonexistent.yaml index 166c901..1527b78 100644 --- a/e2e-tests/mcpchecker/tasks/cve-nonexistent.yaml +++ b/e2e-tests/mcpchecker/tasks/cve-nonexistent.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "cve-nonexistent" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: "A response indicating that CVE-2099-00001 is not found or not detected in any clusters" prompt: inline: "Is CVE-2099-00001 detected in my clusters?" - verify: - contains: "A response indicating that CVE-2099-00001 is not found or not detected in any clusters" diff --git a/e2e-tests/mcpchecker/tasks/list-clusters.yaml b/e2e-tests/mcpchecker/tasks/list-clusters.yaml index 4f0b396..8c637cf 100644 --- a/e2e-tests/mcpchecker/tasks/list-clusters.yaml +++ b/e2e-tests/mcpchecker/tasks/list-clusters.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "list-clusters" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: "A response containing a list of cluster names" prompt: inline: "List my clusters" - verify: - contains: "A response containing a list of cluster names" diff --git a/e2e-tests/mcpchecker/tasks/rhsa-not-supported.yaml b/e2e-tests/mcpchecker/tasks/rhsa-not-supported.yaml index de48984..5a9315a 100644 --- a/e2e-tests/mcpchecker/tasks/rhsa-not-supported.yaml +++ b/e2e-tests/mcpchecker/tasks/rhsa-not-supported.yaml @@ -1,9 +1,11 @@ kind: Task +apiVersion: mcpchecker/v1alpha2 metadata: name: "rhsa-not-supported" difficulty: easy -steps: +spec: + verify: + - llmJudge: + contains: "Information that RHSA-2024:5137 has been detected, along with the cluster where it is detected." prompt: inline: "Is RHSA-2024:5137 affecting my clusters?" - verify: - contains: "Information that RHSA-2024:5137 has been detected, along with the cluster where it is detected." From 53a518ea3213b4756591553af3677f4dfa05a264 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 2 Apr 2026 14:38:31 +0200 Subject: [PATCH 3/4] fix(wiremock): correct ExportNodeResponse fixture format The node fixtures were using plural "nodes" field which doesn't match the ExportNodeResponse protobuf definition that expects singular "node". This was causing wiremock errors: "Cannot find field: nodes in message v1.ExportNodeResponse" For streaming gRPC responses, the fixture should be an array of response messages, each matching the proto message structure. Changes: - affected_nodes.json: wrap nodes in array of {node: {...}} objects - empty.json: return empty object instead of {nodes: []} Co-Authored-By: Claude Sonnet 4.5 --- wiremock/fixtures/nodes/affected_nodes.json | 16 +++++++++------- wiremock/fixtures/nodes/empty.json | 4 +--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/wiremock/fixtures/nodes/affected_nodes.json b/wiremock/fixtures/nodes/affected_nodes.json index 276354e..eb6cdea 100644 --- a/wiremock/fixtures/nodes/affected_nodes.json +++ b/wiremock/fixtures/nodes/affected_nodes.json @@ -1,6 +1,6 @@ -{ - "nodes": [ - { +[ + { + "node": { "id": "node-001", "name": "worker-node-1", "clusterId": "cluster-prod-01", @@ -24,8 +24,10 @@ } ] } - }, - { + } + }, + { + "node": { "id": "node-002", "name": "worker-node-2", "clusterId": "cluster-prod-01", @@ -50,5 +52,5 @@ ] } } - ] -} + } +] diff --git a/wiremock/fixtures/nodes/empty.json b/wiremock/fixtures/nodes/empty.json index 27d035a..0967ef4 100644 --- a/wiremock/fixtures/nodes/empty.json +++ b/wiremock/fixtures/nodes/empty.json @@ -1,3 +1 @@ -{ - "nodes": [] -} +{} From 0ade07f88342e64b7f3f23b52ff8767130abff38 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 2 Apr 2026 14:51:12 +0200 Subject: [PATCH 4/4] docs: analyze mcpchecker v0.0.14 agent output bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Investigated E2E test failures after upgrading to mcpchecker v0.0.14. Found that some tests fail because the OpenAI mock agent makes tool calls but doesn't send a final AgentMessageChunk update, causing llmJudge to fail with "cannot run llmJudge step before agent". Created reproduction test that demonstrates the issue: - Agent makes tool call and gets result (ToolCall + ToolCallUpdate) - But no AgentMessageChunk is sent afterward - ExtractOutputSteps produces only "tool_call" type steps - FinalMessageFromSteps returns empty string - llmJudge validation fails on empty Agent.Output The root cause appears to be in llmagent/acp_agent.go where the OnStepFinish callback may not be called in all scenarios, or step.Response.Content.Text() returns empty after tool calls. This may be related to the fantasy library update (v0.16.0 → v0.17.1) in mcpchecker v0.0.13. Test added to mcpchecker's pkg/agent/extract_test.go at: /tmp/mcpchecker/pkg/agent/extract_test.go Run with: cd /tmp/mcpchecker && go test -v -run TestAgentWithOnlyToolCallsNoFinalMessage ./pkg/agent/ See docs/mcpchecker-v0.0.14-bug-analysis.md for full analysis. Co-Authored-By: Claude Sonnet 4.5 --- docs/mcpchecker-v0.0.14-bug-analysis.md | 163 ++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 docs/mcpchecker-v0.0.14-bug-analysis.md diff --git a/docs/mcpchecker-v0.0.14-bug-analysis.md b/docs/mcpchecker-v0.0.14-bug-analysis.md new file mode 100644 index 0000000..c9d3396 --- /dev/null +++ b/docs/mcpchecker-v0.0.14-bug-analysis.md @@ -0,0 +1,163 @@ +# mcpchecker v0.0.14 Bug Analysis + +## Summary + +E2E tests are failing after upgrading from mcpchecker v0.0.12 to v0.0.14 due to an issue where the OpenAI mock agent makes tool calls but doesn't send a final AgentMessageChunk update. This causes llmJudge verification to fail with: + +``` +cannot run llmJudge step before agent (must be in verification) +``` + +## Affected Tests + +- `cve-cluster-does-exist` - ❌ Makes 1 tool call, no final message +- `cve-cluster-does-not-exist` - ❌ Makes 1 tool call, no final message +- `cve-log4shell` - ❌ Makes 3 tool calls (including failing node call), no final message +- `cve-nonexistent` - ⚠️ Makes 4 tool calls, has final message but judge fails for other reasons + +Working tests like `cve-detected-clusters`, `cve-multiple`, etc. all have a final message step. + +## Root Cause + +### Normal Flow (Working) +1. Agent calls tool via ACP ToolCall update +2. Tool executes and returns result via ToolCallUpdate +3. **OpenAI mock sends follow-up chat completion with "Evaluation complete."** +4. llmagent converts response to AgentMessageChunk via OnStepFinish callback +5. ExtractOutputSteps produces steps including final "message" type +6. llmJudge can evaluate the final message + +### Broken Flow (Failing Tests) +1. Agent calls tool via ACP ToolCall update +2. Tool executes and returns result via ToolCallUpdate +3. **No follow-up message is sent** (or fantasy doesn't call OnStepFinish) +4. ExtractOutputSteps produces only "tool_call" type steps +5. FinalMessageFromSteps returns empty string +6. llmJudge fails because `input.Agent.Output == ""` + +## Technical Details + +### OpenAI Mock Server Behavior + +The mock in `functional/servers/openai/server.go` is supposed to send a follow-up message: + +```go +// If request contains tool result messages, this is a follow-up after a tool call. +// Return a simple text response to end the agentic loop. +for _, msg := range req.Messages { + if msg.Role == "tool" { + followUp := &ChatCompletionResponse{ + // ... + Message: Message{ + Role: "assistant", + Content: "Evaluation complete.", + }, + FinishReason: "stop", + } + // ... + } +} +``` + +### llmagent ACP Agent + +The `acp_agent.go` processes OpenAI responses via fantasy's OnStepFinish: + +```go +OnStepFinish: func(step fantasy.StepResult) error { + text := step.Response.Content.Text() + if text == "" { + return nil // ← Early return if no text! + } + + return a.conn.SessionUpdate(promptCtx, acp.SessionNotification{ + SessionId: params.SessionId, + Update: acp.UpdateAgentMessageText(text), + }) +}, +``` + +If `step.Response.Content.Text()` is empty, no AgentMessageText update is sent. + +### llmJudge Validation + +The llmJudge step validates agent output in `pkg/steps/llm_judge.go:88-90`: + +```go +if input.Agent == nil || input.Agent.Prompt == "" || input.Agent.Output == "" { + return nil, fmt.Errorf("cannot run llmJudge step before agent (must be in verification)") +} +``` + +## Reproduction Test + +Added test in mcpchecker repo: + +```go +// TestAgentWithOnlyToolCallsNoFinalMessage reproduces issue #268 +func TestAgentWithOnlyToolCallsNoFinalMessage(t *testing.T) { + updates := []acp.SessionUpdate{ + { + ToolCall: &acp.SessionUpdateToolCall{ + ToolCallId: "call-1", + Title: "get_clusters_with_orchestrator_cve", + // ... + }, + }, + { + ToolCallUpdate: &acp.SessionToolCallUpdate{ + ToolCallId: "call-1", + Status: ptr(acp.ToolCallStatusCompleted), + // ... + }, + }, + // BUG: No AgentMessageChunk update here! + } + + steps := agent.ExtractOutputSteps(updates) + assert.Len(t, steps, 1, "Only has tool_call, no message") + + finalMessage := agent.FinalMessageFromSteps(steps) + assert.Empty(t, finalMessage) // ← Fails llmJudge validation +} +``` + +To run: `cd /tmp/mcpchecker && go test -v -run TestAgentWithOnlyToolCallsNoFinalMessage ./pkg/agent/` + +## Hypothesis + +The issue may be related to: + +1. **fantasy library behavior change** - The charm.land/fantasy package was updated in v0.0.13 (bump from 0.16.0 to 0.17.1). The OnStepFinish callback might not be called in all scenarios. + +2. **OpenAI streaming response handling** - The mock server's streaming implementation might not be properly triggering OnStepFinish for the follow-up message after tool results. + +3. **ACP protocol handling** - The conversion from OpenAI chat completion responses to ACP SessionUpdate messages might have edge cases. + +## Investigation Steps + +1. ✅ Reproduced issue with unit test +2. ✅ Identified that FinalMessageFromSteps returns empty for failing tests +3. ✅ Traced to missing AgentMessageChunk updates in SessionUpdate stream +4. ⏭️ **TODO**: Check if fantasy v0.17.1 has breaking changes in OnStepFinish behavior +5. ⏭️ **TODO**: Add debug logging to llmagent acp_agent.go to see if OnStepFinish is called +6. ⏭️ **TODO**: Test with real OpenAI API instead of mock to confirm it's a mock issue +7. ⏭️ **TODO**: Review PR #268 discussion on mcpchecker repo for context + +## Workaround + +For now, we've: +1. Migrated all tasks to v1alpha2 format (required by v0.0.14) +2. Fixed wiremock ExportNodeResponse fixture format +3. Waiting to see if these fixes resolve the remaining failures + +If failures persist, we may need to: +- Downgrade to mcpchecker v0.0.12 temporarily +- Report bug upstream to mcpchecker with reproduction test +- Investigate fantasy library update as potential cause + +## Related Links + +- mcpchecker PR #268: https://github.com/mcpchecker/mcpchecker/pull/268 +- Our PR #102: https://github.com/stackrox/stackrox-mcp/pull/102 +- Test run: https://github.com/stackrox/stackrox-mcp/actions/runs/23899405760