From 1cbded21f99d94f9e87281ef066ff73adf892fd6 Mon Sep 17 00:00:00 2001 From: skudasov Date: Fri, 27 Feb 2026 12:51:32 +0100 Subject: [PATCH 1/5] replace Pumba with docker-tc, rework old chaos tests --- .github/workflows/devenv-ocr2-chaos.yml | 83 +++++++ .../{devenv-ocr2.yml => devenv-ocr2-soak.yml} | 2 +- devenv/cmd/cl/completion.go | 6 +- devenv/env.toml | 2 +- devenv/go.mod | 2 +- devenv/go.sum | 4 +- devenv/tests/ocr2/chaos_test.go | 171 ++++++++++++++ .../tests/ocr2/{load_test.go => soak_test.go} | 25 +- devenv/tests/ocr2/test_helpers.go | 16 +- integration-tests/chaos/ocr_chaos_test.go | 222 ------------------ 10 files changed, 265 insertions(+), 268 deletions(-) create mode 100644 .github/workflows/devenv-ocr2-chaos.yml rename .github/workflows/{devenv-ocr2.yml => devenv-ocr2-soak.yml} (97%) create mode 100644 devenv/tests/ocr2/chaos_test.go rename devenv/tests/ocr2/{load_test.go => soak_test.go} (83%) delete mode 100644 integration-tests/chaos/ocr_chaos_test.go diff --git a/.github/workflows/devenv-ocr2-chaos.yml b/.github/workflows/devenv-ocr2-chaos.yml new file mode 100644 index 00000000000..c5b2e8f7391 --- /dev/null +++ b/.github/workflows/devenv-ocr2-chaos.yml @@ -0,0 +1,83 @@ +name: OCR2 Chaos Test + +on: + schedule: + - cron: "0 6 * * *" # Run daily at 6 AM + workflow_dispatch: + +defaults: + run: + working-directory: devenv + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.sha }} + cancel-in-progress: true + +jobs: + chaos: + permissions: + id-token: write + contents: read + pull-requests: write + runs-on: ubuntu24.04-16cores-64GB # ghv-ignore! + steps: + - name: Checkout code + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 + + - name: Install Just + uses: extractions/setup-just@e33e0265a09d6d736e2ee1e0eb685ef1de4669ff # v3 + with: + just-version: "1.40.0" + + - name: Configure AWS credentials using OIDC + uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 + with: + role-to-assume: ${{ secrets.AWS_OIDC_IAM_ROLE_SDLC_ECR_READONLY_ARN }} + aws-region: us-west-2 + + - name: Authenticate to ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1 + + - name: Set up Go + uses: actions/setup-go@v6 # v6 + with: + cache: true + go-version-file: devenv/go.mod + cache-dependency-path: devenv/go.sum + + - name: Download Go dependencies + run: | + go mod download + + - name: Set environment variables + id: set-env + run: | + echo "CHAINLINK_IMAGE=${{ secrets.REGISTRY_SDLC }}/chainlink:nightly-$(date +%Y%m%d)-plugins" >> $GITHUB_ENV + + - name: Run OCR2 environment + env: + FAKE_SERVER_IMAGE: ${{ secrets.FAKE_SERVER_IMAGE }} + run: | + cd cmd/cl && go install . && cd - + cl u env.toml,products/ocr2/basic.toml && cl obs up -f + + - name: Run Chaos tests + id: chaos_test + working-directory: devenv/tests/ocr2 + run: | + echo "Running tests for: $CHAINLINK_IMAGE, product: OCR2" + go test -v -timeout 4h -run TestOCR2Chaos + + - name: Upload Logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: container-logs-smoke + path: devenv/tests/ocr2/logs + retention-days: 3 diff --git a/.github/workflows/devenv-ocr2.yml b/.github/workflows/devenv-ocr2-soak.yml similarity index 97% rename from .github/workflows/devenv-ocr2.yml rename to .github/workflows/devenv-ocr2-soak.yml index a71dd411f3c..f6400bfb2ea 100644 --- a/.github/workflows/devenv-ocr2.yml +++ b/.github/workflows/devenv-ocr2-soak.yml @@ -72,7 +72,7 @@ jobs: working-directory: devenv/tests/ocr2 run: | echo "Running tests for: $CHAINLINK_IMAGE, product: OCR2" - go test -v -timeout 4h -run TestOCR2Load/clean + go test -v -timeout 4h -run TestOCR2Soak/clean - name: Upload Logs if: always() diff --git a/devenv/cmd/cl/completion.go b/devenv/cmd/cl/completion.go index 3bb83852c14..aa16992e97a 100644 --- a/devenv/cmd/cl/completion.go +++ b/devenv/cmd/cl/completion.go @@ -31,9 +31,9 @@ func getSubCommands(parent string) []prompt.Suggest { {Text: "directrequest TestSmoke", Description: "Run Direct Request test"}, {Text: "flux TestSmoke", Description: "Run Flux test"}, {Text: "ocr2 TestSmoke/rounds", Description: "Run OCR2 smoke test"}, - {Text: "ocr2 TestOCR2Load/clean", Description: "Run OCR2 soak test"}, - {Text: "ocr2 TestOCR2Load/gas-spikes", Description: "Run OCR2 soak test + simulate gas spikes"}, - {Text: "ocr2 TestOCR2Load/chaos", Description: "Run OCR2 soak test + introduce container kills and latency"}, + {Text: "ocr2 TestOCR2Soak/clean", Description: "Run OCR2 soak test"}, + {Text: "ocr2 TestOCR2Soak/gas-spikes", Description: "Run OCR2 soak test + simulate gas spikes"}, + {Text: "ocr2 TestOCR2Chaos/rpc_latency", Description: "Run OCR2 chaos test + rpc latency"}, } case "bs": return []prompt.Suggest{ diff --git a/devenv/env.toml b/devenv/env.toml index ce3abb532b2..78b1eef4f4a 100644 --- a/devenv/env.toml +++ b/devenv/env.toml @@ -40,4 +40,4 @@ [[nodesets.node_specs]] [nodesets.node_specs.node] - image = "public.ecr.aws/chainlink/chainlink:2.30.0" \ No newline at end of file + image = "public.ecr.aws/chainlink/chainlink:2.30.0" diff --git a/devenv/go.mod b/devenv/go.mod index b2d55df0d7f..0ad9c107eaf 100644 --- a/devenv/go.mod +++ b/devenv/go.mod @@ -25,7 +25,7 @@ require ( github.com/smartcontractkit/chainlink-evm v0.3.3 github.com/smartcontractkit/chainlink-evm/gethwrappers v0.0.0-20251211123524-f0c4fe7cfc0a github.com/smartcontractkit/chainlink-protos/job-distributor v0.12.0 - github.com/smartcontractkit/chainlink-testing-framework/framework v0.14.6 + github.com/smartcontractkit/chainlink-testing-framework/framework v0.14.9 github.com/smartcontractkit/chainlink-testing-framework/framework/components/fake v0.10.1-0.20250711120409-5078050f9db4 github.com/smartcontractkit/chainlink-testing-framework/seth v1.51.5 github.com/smartcontractkit/chainlink-testing-framework/wasp v1.51.2 diff --git a/devenv/go.sum b/devenv/go.sum index 75aeaa7b8cc..cfa82cefafd 100644 --- a/devenv/go.sum +++ b/devenv/go.sum @@ -947,8 +947,8 @@ github.com/smartcontractkit/chainlink-framework/multinode v0.0.0-20251021173435- github.com/smartcontractkit/chainlink-framework/multinode v0.0.0-20251021173435-e86785845942/go.mod h1:2JTBNp3FlRdO/nHc4dsc9bfxxMClMO1Qt8sLJgtreBY= github.com/smartcontractkit/chainlink-protos/job-distributor v0.12.0 h1:/bhoALRzNXZkdzxBkNM505pMofNy0K0eW1nCzXw+AUI= github.com/smartcontractkit/chainlink-protos/job-distributor v0.12.0/go.mod h1:/dVVLXrsp+V0AbcYGJo3XMzKg3CkELsweA/TTopCsKE= -github.com/smartcontractkit/chainlink-testing-framework/framework v0.14.6 h1:5hy0m8Vj5TzSU1lW2AZHbbpEQ71BXjLCseLMDnaK+Zs= -github.com/smartcontractkit/chainlink-testing-framework/framework v0.14.6/go.mod h1:43xdIQuqw/gzfazsqJkBrGdF25TIJDiY/Ak/YrWFTmU= +github.com/smartcontractkit/chainlink-testing-framework/framework v0.14.9 h1:a/e5gwPjpzQwqtmMCop2vKXi55mYk+bQBP/5t3y60CI= +github.com/smartcontractkit/chainlink-testing-framework/framework v0.14.9/go.mod h1:43xdIQuqw/gzfazsqJkBrGdF25TIJDiY/Ak/YrWFTmU= github.com/smartcontractkit/chainlink-testing-framework/framework/components/fake v0.10.1-0.20250711120409-5078050f9db4 h1:6iIj+U1SA19xftdEJwubATHBoGm4yc8q+MwWz6rlBDc= github.com/smartcontractkit/chainlink-testing-framework/framework/components/fake v0.10.1-0.20250711120409-5078050f9db4/go.mod h1:YEQbZRHFojvlQKeuckG/70t0WkAqOBmArSbkacgHSbc= github.com/smartcontractkit/chainlink-testing-framework/lib/grafana v1.50.0 h1:VIxK8u0Jd0Q/VuhmsNm6Bls6Tb31H/sA3A/rbc5hnhg= diff --git a/devenv/tests/ocr2/chaos_test.go b/devenv/tests/ocr2/chaos_test.go new file mode 100644 index 00000000000..9e1195d6b8e --- /dev/null +++ b/devenv/tests/ocr2/chaos_test.go @@ -0,0 +1,171 @@ +package ocr2 + +import ( + "fmt" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + + "github.com/smartcontractkit/libocr/gethwrappers2/ocr2aggregator" + "github.com/stretchr/testify/require" + + "github.com/smartcontractkit/chainlink-testing-framework/framework" + "github.com/smartcontractkit/chainlink-testing-framework/framework/rpc" + de "github.com/smartcontractkit/chainlink/devenv" + "github.com/smartcontractkit/chainlink/devenv/products" + "github.com/smartcontractkit/chainlink/devenv/products/ocr2" + + "github.com/smartcontractkit/chainlink-testing-framework/framework/chaos" +) + +func TestOCR2Chaos(t *testing.T) { + ctx := t.Context() + outputFile := "../../env-out.toml" + in, err := de.LoadOutput[de.Cfg](outputFile) + require.NoError(t, err) + pdConfig, err := products.LoadOutput[ocr2.Configurator](outputFile) + require.NoError(t, err) + + t.Cleanup(func() { + _, cErr := framework.SaveContainerLogs(fmt.Sprintf("%s-%s", framework.DefaultCTFLogsDir, t.Name())) + require.NoError(t, cErr) + }) + c, _, _, err := products.ETHClient(ctx, in.Blockchains[0].Out.Nodes[0].ExternalWSUrl, pdConfig.Config[0].GasSettings.FeeCapMultiplier, pdConfig.Config[0].GasSettings.TipCapMultiplier) + require.NoError(t, err) + + anvilClient := rpc.New(in.Blockchains[0].Out.Nodes[0].ExternalHTTPUrl, nil) + + dtc, err := chaos.NewDockerChaos(t.Context()) + require.NoError(t, err) + + roundCheckInterval := 5 * time.Second + roundTimeout := 2 * time.Minute + chaosActionDuration := 30 * time.Second + eaChaosDuration := 30 * time.Second + defaultTwoRounds := []*roundSettings{{value: 1}, {value: 1e3}} + + testCases := []testcase{ + { + name: "rpc pause", + roundCheckInterval: roundCheckInterval, + roundTimeout: roundTimeout, + roundSettings: defaultTwoRounds, + repeat: 1, + chaos: func() { + err := dtc.Chaos("anvil", chaos.CmdPause, "") + require.NoError(t, err) + time.Sleep(chaosActionDuration) + err = dtc.RemoveAll() + require.NoError(t, err) + }, + }, + { + name: "rpc latency spike", + roundCheckInterval: roundCheckInterval, + roundTimeout: roundTimeout, + roundSettings: defaultTwoRounds, + repeat: 1, + chaos: func() { + err := dtc.Chaos("anvil", chaos.CmdDelay, "3s") + require.NoError(t, err) + time.Sleep(chaosActionDuration) + err = dtc.RemoveAll() + require.NoError(t, err) + }, + }, + { + name: "nodes mixed", + roundCheckInterval: roundCheckInterval, + roundTimeout: roundTimeout, + roundSettings: defaultTwoRounds, + repeat: 1, + chaos: func() { + err := dtc.Chaos("don-node1", chaos.CmdDelay, "1s") + require.NoError(t, err) + err = dtc.Chaos("don-node2", chaos.CmdLoss, "30%") + require.NoError(t, err) + err = dtc.Chaos("don-node3", chaos.CmdCorrupt, "30%") + require.NoError(t, err) + err = dtc.Chaos("don-node4", chaos.CmdDuplicate, "30%") + require.NoError(t, err) + time.Sleep(chaosActionDuration) + err = dtc.RemoveAll() + require.NoError(t, err) + }, + }, + { + name: "nodes pause minority", + roundCheckInterval: roundCheckInterval, + roundTimeout: roundTimeout, + roundSettings: defaultTwoRounds, + repeat: 1, + chaos: func() { + err := dtc.Chaos("don-node1", chaos.CmdPause, "") + require.NoError(t, err) + err = dtc.Chaos("don-node2", chaos.CmdPause, "") + require.NoError(t, err) + time.Sleep(chaosActionDuration) + err = dtc.RemoveAll() + require.NoError(t, err) + }, + }, + { + name: "nodes pause majority", + roundCheckInterval: roundCheckInterval, + roundTimeout: roundTimeout, + roundSettings: defaultTwoRounds, + repeat: 1, + chaos: func() { + err := dtc.Chaos("don-node1", chaos.CmdPause, "") + require.NoError(t, err) + err = dtc.Chaos("don-node2", chaos.CmdPause, "") + require.NoError(t, err) + err = dtc.Chaos("don-node3", chaos.CmdPause, "") + require.NoError(t, err) + time.Sleep(chaosActionDuration) + err = dtc.RemoveAll() + require.NoError(t, err) + }, + }, + + { + name: "pause ea", + roundCheckInterval: roundCheckInterval, + roundTimeout: roundTimeout, + roundSettings: defaultTwoRounds, + repeat: 1, + chaos: func() { + err := dtc.Chaos("fake", chaos.CmdPause, "") + require.NoError(t, err) + time.Sleep(eaChaosDuration) + err = dtc.RemoveAll() + require.NoError(t, err) + }, + }, + { + name: "slow ea", + roundCheckInterval: roundCheckInterval, + roundSettings: defaultTwoRounds, + roundTimeout: roundTimeout, + chaos: func() { + err := dtc.Chaos("fake", chaos.CmdDelay, "5s") + require.NoError(t, err) + time.Sleep(eaChaosDuration) + err = dtc.RemoveAll() + require.NoError(t, err) + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + o2, err := ocr2aggregator.NewOCR2Aggregator(common.HexToAddress(pdConfig.Config[0].DeployedContracts.OCRv2AggregatorAddr), c) + require.NoError(t, err) + tc.chaos() + for range tc.repeat { + verifyRounds(t, in, o2, tc, anvilClient) + } + }) + } +} diff --git a/devenv/tests/ocr2/load_test.go b/devenv/tests/ocr2/soak_test.go similarity index 83% rename from devenv/tests/ocr2/load_test.go rename to devenv/tests/ocr2/soak_test.go index 6323b0e0a69..821714fcd64 100644 --- a/devenv/tests/ocr2/load_test.go +++ b/devenv/tests/ocr2/soak_test.go @@ -20,7 +20,7 @@ import ( "github.com/smartcontractkit/chainlink/devenv/products/ocr2" ) -func TestOCR2Load(t *testing.T) { +func TestOCR2Soak(t *testing.T) { ctx := t.Context() outputFile := "../../env-out.toml" in, err := de.LoadOutput[de.Cfg](outputFile) @@ -85,29 +85,6 @@ func TestOCR2Load(t *testing.T) { }, }, }, - { - name: "chaos", - roundCheckInterval: 5 * time.Second, - roundTimeout: 2 * time.Minute, - repeat: 2, - roundSettings: []*roundSettings{ - // these are just Pumba tool commands, read more here https://github.com/alexei-led/pumba - { - value: 1, - chaos: &chaosSettings{ - command: "stop --duration=10s --restart re2:don-node0", - recoveryWaitTime: 10 * time.Second, - }, - }, - { - value: 1e3, - chaos: &chaosSettings{ - command: "netem --tc-image=gaiadocker/iproute2 --duration=10s delay --time=1000 re2:don-node.*", - recoveryWaitTime: 10 * time.Second, - }, - }, - }, - }, } for _, tc := range testCases { diff --git a/devenv/tests/ocr2/test_helpers.go b/devenv/tests/ocr2/test_helpers.go index 28892540f93..13742d054d0 100644 --- a/devenv/tests/ocr2/test_helpers.go +++ b/devenv/tests/ocr2/test_helpers.go @@ -12,7 +12,6 @@ import ( "github.com/smartcontractkit/libocr/gethwrappers2/ocr2aggregator" "github.com/stretchr/testify/require" - "github.com/smartcontractkit/chainlink-testing-framework/framework/chaos" "github.com/smartcontractkit/chainlink-testing-framework/framework/rpc" de "github.com/smartcontractkit/chainlink/devenv" "github.com/smartcontractkit/chainlink/devenv/products/ocr2" @@ -27,10 +26,7 @@ var ( LatestRoundAnswer = int64(0) ) -type chaosSettings struct { - command string - recoveryWaitTime time.Duration -} +type chaosFunc func() type gasSettings struct { gasPriceStart *big.Int @@ -43,7 +39,6 @@ type gasSettings struct { type roundSettings struct { value int gas *gasSettings - chaos *chaosSettings } type testcase struct { @@ -51,6 +46,7 @@ type testcase struct { roundCheckInterval time.Duration roundTimeout time.Duration repeat int + chaos chaosFunc roundSettings []*roundSettings cfg *ocr2.OCRv2SetConfigOptions } @@ -133,14 +129,6 @@ func verifyRounds(t *testing.T, in *de.Cfg, o2 *ocr2aggregator.OCR2Aggregator, t L.Info().Msg("Creating gas spike") simulateGasSpike(t, c, currentRoundSettings.gas) } - if currentRoundSettings.chaos != nil { - L.Info().Msg("Executing chaos action") - _, err = chaos.ExecPumba( - currentRoundSettings.chaos.command, - currentRoundSettings.chaos.recoveryWaitTime, - ) - require.NoError(t, err) - } require.NoError(t, err) TotalRoundsPerTestCount++ } diff --git a/integration-tests/chaos/ocr_chaos_test.go b/integration-tests/chaos/ocr_chaos_test.go deleted file mode 100644 index 4f405067d7f..00000000000 --- a/integration-tests/chaos/ocr_chaos_test.go +++ /dev/null @@ -1,222 +0,0 @@ -package chaos - -import ( - "math/big" - "strconv" - "testing" - - "github.com/ethereum/go-ethereum/common" - "github.com/onsi/gomega" - "github.com/stretchr/testify/require" - - ctfClient "github.com/smartcontractkit/chainlink-testing-framework/lib/client" - ctf_config "github.com/smartcontractkit/chainlink-testing-framework/lib/config" - "github.com/smartcontractkit/chainlink-testing-framework/lib/k8s/chaos" - "github.com/smartcontractkit/chainlink-testing-framework/lib/k8s/environment" - "github.com/smartcontractkit/chainlink-testing-framework/lib/k8s/pkg/helm/chainlink" - "github.com/smartcontractkit/chainlink-testing-framework/lib/k8s/pkg/helm/ethereum" - "github.com/smartcontractkit/chainlink-testing-framework/lib/k8s/pkg/helm/mockserver" - mockservercfg "github.com/smartcontractkit/chainlink-testing-framework/lib/k8s/pkg/helm/mockserver-cfg" - "github.com/smartcontractkit/chainlink-testing-framework/lib/logging" - "github.com/smartcontractkit/chainlink-testing-framework/lib/networks" - "github.com/smartcontractkit/chainlink-testing-framework/lib/utils/ptr" - seth_utils "github.com/smartcontractkit/chainlink-testing-framework/lib/utils/seth" - "github.com/smartcontractkit/chainlink-testing-framework/lib/utils/testcontext" - - "github.com/smartcontractkit/chainlink/deployment/environment/nodeclient" - "github.com/smartcontractkit/chainlink/integration-tests/actions" - "github.com/smartcontractkit/chainlink/integration-tests/contracts" - tc "github.com/smartcontractkit/chainlink/integration-tests/testconfig" -) - -var ( - defaultOCRSettings = map[string]any{ - "replicas": 6, - "db": map[string]any{ - "stateful": true, - "capacity": "1Gi", - "resources": map[string]any{ - "requests": map[string]any{ - "cpu": "250m", - "memory": "256Mi", - }, - "limits": map[string]any{ - "cpu": "250m", - "memory": "256Mi", - }, - }, - }, - } - chaosStartRound int64 = 1 - chaosEndRound int64 = 4 -) - -func TestOCRChaos(t *testing.T) { - t.Parallel() - l := logging.GetTestLogger(t) - config, err := tc.GetConfig([]string{"Chaos"}, tc.OCR) - require.NoError(t, err, "Error getting config") - - var overrideFn = func(_ any, target any) { - ctf_config.MustConfigOverrideChainlinkVersion(config.GetChainlinkImageConfig(), target) - ctf_config.MightConfigOverridePyroscopeKey(config.GetPyroscopeConfig(), target) - } - - tomlConfig, err := actions.BuildTOMLNodeConfigForK8s(&config, networks.MustGetSelectedNetworkConfig(config.Network)[0]) - require.NoError(t, err, "Error building TOML config") - - defaultOCRSettings["toml"] = tomlConfig - - chainlinkCfg := chainlink.NewWithOverride(0, defaultOCRSettings, config.ChainlinkImage, overrideFn) - - testCases := map[string]struct { - networkChart environment.ConnectedChart - clChart environment.ConnectedChart - chaosFunc chaos.ManifestFunc - chaosProps *chaos.Props - }{ - // network-* and pods-* are split intentionally into 2 parallel groups - // we can't use chaos.NewNetworkPartition and chaos.NewFailPods in parallel - // because of jsii runtime bug, see Makefile and please use those targets to run tests - // - // We are using two chaos experiments to simulate pods/network faults, - // check chaos.NewFailPods method (https://chaos-mesh.org/docs/simulate-pod-chaos-on-kubernetes/) - // and chaos.NewNetworkPartition method (https://chaos-mesh.org/docs/simulate-network-chaos-on-kubernetes/) - // in order to regenerate Go bindings if k8s version will be updated - // you can pull new CRD spec from your current cluster and check README here - // https://github.com/smartcontractkit/chainlink-testing-framework/lib/k8s/blob/master/README.md - NetworkChaosFailMajorityNetwork: { - ethereum.New(nil), - chainlinkCfg, - chaos.NewNetworkPartition, - &chaos.Props{ - FromLabels: &map[string]*string{ChaosGroupMajority: ptr.Ptr("1")}, - ToLabels: &map[string]*string{ChaosGroupMinority: ptr.Ptr("1")}, - DurationStr: "1m", - }, - }, - NetworkChaosFailBlockchainNode: { - ethereum.New(nil), - chainlinkCfg, - chaos.NewNetworkPartition, - &chaos.Props{ - FromLabels: &map[string]*string{"app": ptr.Ptr("geth")}, - ToLabels: &map[string]*string{ChaosGroupMajorityPlus: ptr.Ptr("1")}, - DurationStr: "1m", - }, - }, - PodChaosFailMinorityNodes: { - ethereum.New(nil), - chainlinkCfg, - chaos.NewFailPods, - &chaos.Props{ - LabelsSelector: &map[string]*string{ChaosGroupMinority: ptr.Ptr("1")}, - DurationStr: "1m", - }, - }, - PodChaosFailMajorityNodes: { - ethereum.New(nil), - chainlinkCfg, - chaos.NewFailPods, - &chaos.Props{ - LabelsSelector: &map[string]*string{ChaosGroupMajority: ptr.Ptr("1")}, - DurationStr: "1m", - }, - }, - PodChaosFailMajorityDB: { - ethereum.New(nil), - chainlinkCfg, - chaos.NewFailPods, - &chaos.Props{ - LabelsSelector: &map[string]*string{ChaosGroupMajority: ptr.Ptr("1")}, - DurationStr: "1m", - ContainerNames: &[]*string{ptr.Ptr("chainlink-db")}, - }, - }, - } - - for n, tst := range testCases { - name := n - testCase := tst - t.Run("OCR_"+name, func(t *testing.T) { - t.Parallel() - - nsLabels, err := environment.GetRequiredChainLinkNamespaceLabels("data-feedsv1.0", "chaos") - require.NoError(t, err, "Error creating required chain.link labels for namespace") - - workloadPodLabels, err := environment.GetRequiredChainLinkWorkloadAndPodLabels("data-feedsv1.0", "chaos") - require.NoError(t, err, "Error creating required chain.link labels for workloads and pods") - - testEnvironment := environment.New(&environment.Config{ - NamespacePrefix: "chaos-ocr-" + name, - Test: t, - Labels: nsLabels, - WorkloadLabels: workloadPodLabels, - PodLabels: workloadPodLabels, - }). - AddHelm(mockservercfg.New(nil)). - AddHelm(mockserver.New(nil)). - AddHelm(testCase.networkChart). - AddHelm(testCase.clChart) - err = testEnvironment.Run() - require.NoError(t, err) - if testEnvironment.WillUseRemoteRunner() { - return - } - - err = testEnvironment.Client.LabelChaosGroup(testEnvironment.Cfg.Namespace, "instance=node-", 1, 2, ChaosGroupMinority) - require.NoError(t, err) - err = testEnvironment.Client.LabelChaosGroup(testEnvironment.Cfg.Namespace, "instance=node-", 3, 5, ChaosGroupMajority) - require.NoError(t, err) - err = testEnvironment.Client.LabelChaosGroup(testEnvironment.Cfg.Namespace, "instance=node-", 2, 5, ChaosGroupMajorityPlus) - require.NoError(t, err) - - cfg := config.MustCopy().(tc.TestConfig) - - network := networks.MustGetSelectedNetworkConfig(cfg.GetNetworkConfig())[0] - network = seth_utils.MustReplaceSimulatedNetworkUrlWithK8(l, network, *testEnvironment) - - seth, err := seth_utils.GetChainClient(&cfg, network) - require.NoError(t, err, "Error creating seth client") - - chainlinkNodes, err := nodeclient.ConnectChainlinkNodes(testEnvironment) - require.NoError(t, err, "Connecting to chainlink nodes shouldn't fail") - bootstrapNode, workerNodes := chainlinkNodes[0], chainlinkNodes[1:] - t.Cleanup(func() { - err := actions.TeardownRemoteSuite(t, seth, testEnvironment.Cfg.Namespace, chainlinkNodes, nil, &cfg) - require.NoError(t, err, "Error tearing down environment") - }) - - ms := ctfClient.ConnectMockServer(testEnvironment) - linkContract, err := actions.LinkTokenContract(l, seth, config.OCR) - require.NoError(t, err, "Error deploying link token contract") - - err = actions.FundChainlinkNodesFromRootAddress(l, seth, contracts.ChainlinkK8sClientToChainlinkNodeWithKeysAndAddress(chainlinkNodes), big.NewFloat(10)) - require.NoError(t, err) - - ocrInstances, err := actions.SetupOCRv1Contracts(l, seth, config.OCR, common.HexToAddress(linkContract.Address()), contracts.ChainlinkK8sClientToChainlinkNodeWithKeysAndAddress(workerNodes)) - require.NoError(t, err) - err = actions.CreateOCRJobs(ocrInstances, bootstrapNode, workerNodes, 5, ms, strconv.FormatInt(seth.ChainID, 10)) - require.NoError(t, err) - - chaosApplied := false - - gom := gomega.NewGomegaWithT(t) - gom.Eventually(func(g gomega.Gomega) { - for _, ocr := range ocrInstances { - err := ocr.RequestNewRound() - require.NoError(t, err, "Error requesting new round") - } - round, err := ocrInstances[0].GetLatestRound(testcontext.Get(t)) - g.Expect(err).ShouldNot(gomega.HaveOccurred()) - l.Info().Int64("RoundID", round.RoundId.Int64()).Msg("Latest OCR Round") - if round.RoundId.Int64() == chaosStartRound && !chaosApplied { - chaosApplied = true - _, err = testEnvironment.Chaos.Run(testCase.chaosFunc(testEnvironment.Cfg.Namespace, testCase.chaosProps)) - require.NoError(t, err) - } - g.Expect(round.RoundId.Int64()).Should(gomega.BeNumerically(">=", chaosEndRound)) - }, "6m", "3s").Should(gomega.Succeed()) - }) - } -} From c857d643b8e6a4bcdb511fee3436ce85be26ed22 Mon Sep 17 00:00:00 2001 From: skudasov Date: Fri, 27 Feb 2026 12:53:26 +0100 Subject: [PATCH 2/5] trigger --- .github/workflows/devenv-ocr2-chaos.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/devenv-ocr2-chaos.yml b/.github/workflows/devenv-ocr2-chaos.yml index c5b2e8f7391..7c4e179aa50 100644 --- a/.github/workflows/devenv-ocr2-chaos.yml +++ b/.github/workflows/devenv-ocr2-chaos.yml @@ -1,6 +1,7 @@ name: OCR2 Chaos Test on: + push: schedule: - cron: "0 6 * * *" # Run daily at 6 AM workflow_dispatch: From 2c5485f4a5c13b5911d86e5e5f1f162dbf4556e8 Mon Sep 17 00:00:00 2001 From: skudasov Date: Fri, 27 Feb 2026 14:02:03 +0100 Subject: [PATCH 3/5] upgrade fake --- devenv/go.mod | 2 +- devenv/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devenv/go.mod b/devenv/go.mod index 0ad9c107eaf..44e334c70ec 100644 --- a/devenv/go.mod +++ b/devenv/go.mod @@ -26,7 +26,7 @@ require ( github.com/smartcontractkit/chainlink-evm/gethwrappers v0.0.0-20251211123524-f0c4fe7cfc0a github.com/smartcontractkit/chainlink-protos/job-distributor v0.12.0 github.com/smartcontractkit/chainlink-testing-framework/framework v0.14.9 - github.com/smartcontractkit/chainlink-testing-framework/framework/components/fake v0.10.1-0.20250711120409-5078050f9db4 + github.com/smartcontractkit/chainlink-testing-framework/framework/components/fake v0.14.9 github.com/smartcontractkit/chainlink-testing-framework/seth v1.51.5 github.com/smartcontractkit/chainlink-testing-framework/wasp v1.51.2 github.com/smartcontractkit/libocr v0.0.0-20251212213002-0a5e2f907dda diff --git a/devenv/go.sum b/devenv/go.sum index cfa82cefafd..c5b7b73979a 100644 --- a/devenv/go.sum +++ b/devenv/go.sum @@ -949,8 +949,8 @@ github.com/smartcontractkit/chainlink-protos/job-distributor v0.12.0 h1:/bhoALRz github.com/smartcontractkit/chainlink-protos/job-distributor v0.12.0/go.mod h1:/dVVLXrsp+V0AbcYGJo3XMzKg3CkELsweA/TTopCsKE= github.com/smartcontractkit/chainlink-testing-framework/framework v0.14.9 h1:a/e5gwPjpzQwqtmMCop2vKXi55mYk+bQBP/5t3y60CI= github.com/smartcontractkit/chainlink-testing-framework/framework v0.14.9/go.mod h1:43xdIQuqw/gzfazsqJkBrGdF25TIJDiY/Ak/YrWFTmU= -github.com/smartcontractkit/chainlink-testing-framework/framework/components/fake v0.10.1-0.20250711120409-5078050f9db4 h1:6iIj+U1SA19xftdEJwubATHBoGm4yc8q+MwWz6rlBDc= -github.com/smartcontractkit/chainlink-testing-framework/framework/components/fake v0.10.1-0.20250711120409-5078050f9db4/go.mod h1:YEQbZRHFojvlQKeuckG/70t0WkAqOBmArSbkacgHSbc= +github.com/smartcontractkit/chainlink-testing-framework/framework/components/fake v0.14.9 h1:MVDx/Zl7qhikAx5vQgpTiyCkXw6sXgerUAU3WyJgWVY= +github.com/smartcontractkit/chainlink-testing-framework/framework/components/fake v0.14.9/go.mod h1:1ZKcfw6mNKvM5GNy8AjeviL0tJVZoqhLZbmskcSG68k= github.com/smartcontractkit/chainlink-testing-framework/lib/grafana v1.50.0 h1:VIxK8u0Jd0Q/VuhmsNm6Bls6Tb31H/sA3A/rbc5hnhg= github.com/smartcontractkit/chainlink-testing-framework/lib/grafana v1.50.0/go.mod h1:lyAu+oMXdNUzEDScj2DXB2IueY+SDXPPfyl/kb63tMM= github.com/smartcontractkit/chainlink-testing-framework/seth v1.51.5 h1:RwZXxdIAOyjp6cwc9Quxgr38k8r7ACz+Lxh9o/A6oH0= From 9b1c7432661f0120804fd4df2f01853ebe88664e Mon Sep 17 00:00:00 2001 From: skudasov Date: Mon, 2 Mar 2026 18:29:48 +0100 Subject: [PATCH 4/5] fix and enable logpoller chaos tests --- devenv/tests/logpoller/logpoller_test.go | 18 ----- devenv/tests/logpoller/test_helpers.go | 92 +++++++++--------------- 2 files changed, 33 insertions(+), 77 deletions(-) diff --git a/devenv/tests/logpoller/logpoller_test.go b/devenv/tests/logpoller/logpoller_test.go index bfde52d5111..6238299b6df 100644 --- a/devenv/tests/logpoller/logpoller_test.go +++ b/devenv/tests/logpoller/logpoller_test.go @@ -238,17 +238,8 @@ func XTestLogPollerHeavyLoad(t *testing.T) { // with approximate emission of 520-550 logs per second for ~110 seconds // 6 filters are registered -/* - Chaos runs require Pumba, but that container still speaks Docker API 1.42. Newer Docker daemons reject it unless you pin a - minimum API version. If this test never pauses anything, add the entry below to your Docker Engine config (e.g. ~/.docker/daemon.json) - and restart Docker: - - "min-api-version": "1.42" -*/ - // Execute both on environment with finalityTagEnabled and with finalityDepth func TestLogPollerChaosChainlinkNodes(t *testing.T) { - t.Skip("We need to find replacement for Pumba, which doesn't work with Docker API > 1.42") cfg := &Config{ General: General{ Generator: "looped", @@ -274,17 +265,8 @@ func TestLogPollerChaosChainlinkNodes(t *testing.T) { // with approximate emission of 520-550 logs per second for ~110 seconds // 6 filters are registered -/* - Chaos runs require Pumba, but that container still speaks Docker API 1.42. Newer Docker daemons reject it unless you pin a - minimum API version. If this test never pauses anything, add the entry below to your Docker Engine config (e.g. ~/.docker/daemon.json) - and restart Docker: - - "min-api-version": "1.42" -*/ - // Execute both on environment with finalityTagEnabled and with finalityDepth func TestLogPollerChaosPostgres(t *testing.T) { - t.Skip("We need to find replacement for Pumba, which doesn't work with Docker API > 1.42") cfg := &Config{ General: General{ Generator: "looped", diff --git a/devenv/tests/logpoller/test_helpers.go b/devenv/tests/logpoller/test_helpers.go index 014739aae7f..afa5b4d88c7 100644 --- a/devenv/tests/logpoller/test_helpers.go +++ b/devenv/tests/logpoller/test_helpers.go @@ -10,7 +10,6 @@ import ( "math" "math/big" "math/rand" - "os/exec" "sort" "strings" "sync" @@ -33,6 +32,7 @@ import ( "github.com/smartcontractkit/chainlink-evm/pkg/logpoller" cltypes "github.com/smartcontractkit/chainlink-evm/pkg/types" "github.com/smartcontractkit/chainlink-testing-framework/framework" + "github.com/smartcontractkit/chainlink-testing-framework/framework/chaos" "github.com/smartcontractkit/chainlink-testing-framework/framework/components/clnode" "github.com/smartcontractkit/chainlink-testing-framework/framework/components/postgres" nodeset "github.com/smartcontractkit/chainlink-testing-framework/framework/components/simple_node_set" @@ -624,7 +624,7 @@ func missingLogs( // printMissingLogsInfo prints various useful information about the missing logs func printMissingLogsInfo(missingLogs map[string][]geth_types.Log, l zerolog.Logger, cfg *Config) { - var findHumanName = func(topic common.Hash) string { + findHumanName := func(topic common.Hash) string { for _, event := range cfg.General.EventsToEmit { if event.ID == topic { return event.Name @@ -763,7 +763,6 @@ func runWaspGenerator(t *testing.T, cfg *Config, logEmitters []contracts.LogEmit } _, err := p.Run(true) - if err != nil { return 0, err } @@ -807,9 +806,9 @@ func runLoopedGenerator(cfg *Config, client *seth.Client, logEmitters []contract return 0, err } - var atomicCounter = atomic.Int32{} + atomicCounter := atomic.Int32{} - var emitAllEventsFn = func(resultCh chan emittedLogsData, errorCh chan error, _ int, task logEmissionTask) { + emitAllEventsFn := func(resultCh chan emittedLogsData, errorCh chan error, _ int, task logEmissionTask) { current := atomicCounter.Add(1) address := task.emitter.Address().String() @@ -848,7 +847,6 @@ func runLoopedGenerator(cfg *Config, client *seth.Client, logEmitters []contract executor := concurrency.NewConcurrentExecutor[emittedLogsData, emittedLogsData, logEmissionTask](l) r, err := executor.Execute(len(client.Cfg.Network.PrivateKeys)-1, tasks, emitAllEventsFn) - if err != nil { return 0, err } @@ -883,7 +881,7 @@ type PauseData struct { var ChaosPauses = []PauseData{} // chaosPauseSyncFn pauses ranom container of the provided type for a random amount of time between 5 and 20 seconds -func chaosPauseSyncFn(ctx context.Context, l zerolog.Logger, client *seth.Client, nodes *nodeset.Input, targetComponent string) ChaosPauseData { +func chaosPauseSyncFn(ctx context.Context, dtc *chaos.DockerChaos, l zerolog.Logger, client *seth.Client, nodes *nodeset.Input, targetComponent string) ChaosPauseData { // var component ctf_test_env.EnvComponent var containerName string @@ -910,8 +908,13 @@ func chaosPauseSyncFn(ctx context.Context, l zerolog.Logger, client *seth.Client l.Info().Str("Container", containerName).Int("Pause time", pauseTimeSec).Msg("Pausing component") pauseTimeDur := time.Duration(pauseTimeSec) * time.Second - if err := pauseContainer(ctx, l, containerName, pauseTimeDur); err != nil { - return ChaosPauseData{Err: err} + err = dtc.Chaos(containerName, chaos.CmdPause, "") + if err != nil { + return ChaosPauseData{Err: fmt.Errorf("failed to pause docker container: %s, %w", containerName, err)} + } + time.Sleep(pauseTimeDur) + if err := dtc.RemoveAll(); err != nil { + return ChaosPauseData{Err: fmt.Errorf("failed to unpause docker container %s: %w", containerName, err)} } l.Info().Str("Container", containerName).Msg("Component unpaused") @@ -930,39 +933,6 @@ func chaosPauseSyncFn(ctx context.Context, l zerolog.Logger, client *seth.Client }} } -func pauseContainer(ctx context.Context, l zerolog.Logger, containerName string, pauseTimeDur time.Duration) error { - command := fmt.Sprintf(`docker run -i --rm -v /var/run/docker.sock:/var/run/docker.sock --network %s gaiaadm/pumba --log-level=info pause --duration=%s %s`, framework.DefaultNetworkName, pauseTimeDur.String(), containerName) - - fmt.Println("command: ", command) - - c := strings.Split(command, " ") - l.Info().Interface("Command", c).Msg("Executing command") - cmd := exec.CommandContext(ctx, c[0], c[1:]...) // #nosec: G204 - stderr, err := cmd.StderrPipe() - if err != nil { - return err - } - stdout, err := cmd.StdoutPipe() - if err != nil { - return err - } - if err := cmd.Start(); err != nil { - return err - } - outputFunction := func(m string) { - l.Debug().Str("Text", m).Msg("Std Pipe") - } - go readStdPipe(stderr, outputFunction) - go readStdPipe(stdout, outputFunction) - - err = cmd.Wait() - if err != nil { - return err - } - - return nil -} - // readStdPipe continuously read a pipe from the command func readStdPipe(pipe io.ReadCloser, outputFunction func(string)) { scanner := bufio.NewScanner(pipe) @@ -987,6 +957,12 @@ func executeChaosExperiment(ctx context.Context, l zerolog.Logger, nodes *nodese return } + dtc, err := chaos.NewDockerChaos(ctx) + if err != nil { + errorCh <- fmt.Errorf("failed to created docker-tc container: %w", err) + return + } + chaosChan := make(chan ChaosPauseData, config.ChaosConfig.ExperimentCount) wg := &sync.WaitGroup{} @@ -1005,7 +981,7 @@ func executeChaosExperiment(ctx context.Context, l zerolog.Logger, nodes *nodese current := i + 1 l.Info().Str("Current/Total", fmt.Sprintf("%d/%d", current, config.ChaosConfig.ExperimentCount)).Msg("Done with experiment") }() - chaosChan <- chaosPauseSyncFn(ctx, l, sethClient, nodes, config.ChaosConfig.TargetComponent) + chaosChan <- chaosPauseSyncFn(ctx, dtc, l, sethClient, nodes, config.ChaosConfig.TargetComponent) time.Sleep(10 * time.Second) }() } @@ -1049,22 +1025,20 @@ const ( defaultAmountOfUpkeeps = 2 ) -var ( - DefaultOCRRegistryConfig = contracts.KeeperRegistrySettings{ - PaymentPremiumPPB: uint32(200000000), - FlatFeeMicroLINK: uint32(0), - BlockCountPerTurn: big.NewInt(10), - CheckGasLimit: uint32(2500000), - StalenessSeconds: big.NewInt(90000), - GasCeilingMultiplier: uint16(1), - MinUpkeepSpend: big.NewInt(0), - MaxPerformGas: uint32(5000000), - FallbackGasPrice: big.NewInt(2e11), - FallbackLinkPrice: big.NewInt(2e18), - MaxCheckDataSize: uint32(5000), - MaxPerformDataSize: uint32(5000), - } -) +var DefaultOCRRegistryConfig = contracts.KeeperRegistrySettings{ + PaymentPremiumPPB: uint32(200000000), + FlatFeeMicroLINK: uint32(0), + BlockCountPerTurn: big.NewInt(10), + CheckGasLimit: uint32(2500000), + StalenessSeconds: big.NewInt(90000), + GasCeilingMultiplier: uint16(1), + MinUpkeepSpend: big.NewInt(0), + MaxPerformGas: uint32(5000000), + FallbackGasPrice: big.NewInt(2e11), + FallbackLinkPrice: big.NewInt(2e18), + MaxCheckDataSize: uint32(5000), + MaxPerformDataSize: uint32(5000), +} // uploadLogEmitterContracts uploads the configured number of log emitter contracts func uploadLogEmitterContracts(l zerolog.Logger, t *testing.T, client *seth.Client, config *Config) []contracts.LogEmitter { From 9bcfb409c6e5e2f64118fb95cfba9007722f06ab Mon Sep 17 00:00:00 2001 From: skudasov Date: Mon, 2 Mar 2026 18:31:57 +0100 Subject: [PATCH 5/5] disable chaos on push --- .github/workflows/devenv-ocr2-chaos.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/devenv-ocr2-chaos.yml b/.github/workflows/devenv-ocr2-chaos.yml index 7c4e179aa50..c5b2e8f7391 100644 --- a/.github/workflows/devenv-ocr2-chaos.yml +++ b/.github/workflows/devenv-ocr2-chaos.yml @@ -1,7 +1,6 @@ name: OCR2 Chaos Test on: - push: schedule: - cron: "0 6 * * *" # Run daily at 6 AM workflow_dispatch: