From 2977d9e9342da264127025990bf06346b29e7e7f Mon Sep 17 00:00:00 2001 From: mcamou Date: Thu, 23 Oct 2025 19:56:44 +0200 Subject: [PATCH 01/15] feat: Add dry run mode and clarify API URL This commit introduces a DRY_RUN mode to prevent actual retagging operations, which is useful for testing. It also renames the RPC_URL environment variable to API_URL to more accurately reflect that it connects to the Cosmos SDK REST API, not the Tendermint RPC endpoint. The type has also been changed to *url.URL for better validation. Additionally, this commit includes: - A fix for an off-by-one error in the upgrade height calculation. - Updates to the README to document the new and changed environment variables. --- README.md | 10 +++++----- cmd/updater/main.go | 2 +- config/config.go | 4 +++- cosmos/client.go | 9 +++++---- cosmos/client_integration_test.go | 5 ++++- updater/updater.go | 8 +++++++- updater/updater_test.go | 30 ++++++++++++++++++++++++++++-- 7 files changed, 53 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 121d708..2b87521 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ All configuration is done by means of environment variables: ### Connectivity -`RPC_URL` - URL to connect to the Cosmos chain REST API. Default is `http://localhost:1317`. +`API_URL` - URL to connect to the Cosmos chain REST API. Default is `http://localhost:1317`. ### Docker parameters @@ -32,12 +32,13 @@ All configuration is done by means of environment variables: `POLL_INTERVAL` - How long to wait between Cosmos chain polls, in Golang Duration format. The default is `1m`. +`DRY_RUN` - If set to `true`, the application will not perform any retagging operations on DockerHub. Instead, it will log the actions it would have taken. This is useful for testing and validation. Default is `false`. + `HTTP_PORT` - The port on which to expose health, metrics, and profiling endpoints. Default is `8080`. ## Observability The service exposes several endpoints for monitoring and debugging: - * `GET /healthz`: A liveness probe that returns `200 OK` if the service is running. * `GET /readyz`: A readiness probe that returns `200 OK` if the service can connect to both the Cosmos chain and DockerHub. Otherwise, it returns `503 Service Unavailable`. * `GET /metrics`: Exposes Prometheus metrics for monitoring. @@ -53,7 +54,7 @@ docker run \ -e TARGET_PREFIX="mainnet-" \ -e DOCKERHUB_USER="myuser" \ -e DOCKERHUB_PASSWORD="mypassword" \ - -e RPC_URL="http://my-cosmos-node:1317" \ + -e API_URL="http://my-cosmos-node:1317" \ -p 8080:8080 \ gopher-updater:latest ``` @@ -96,7 +97,7 @@ spec: secretKeyRef: name: dockerhub key: password - - name: RPC_URL + - name: API_URL value: "http://my-cosmos-node:1317" - name: HTTP_PORT value: "8080" @@ -109,7 +110,6 @@ spec: path: /readyz port: http ``` - ## Development ```bash diff --git a/cmd/updater/main.go b/cmd/updater/main.go index 8a62370..4c993c6 100644 --- a/cmd/updater/main.go +++ b/cmd/updater/main.go @@ -48,7 +48,7 @@ func main() { }, } - cosmosClient := cosmos.NewClient(cfg.RPCURL, httpClient) + cosmosClient := cosmos.NewClient(cfg.APIURL, httpClient) dockerhubClient := dockerhub.NewClient(cfg.DockerHubUser, cfg.DockerHubPassword, httpClient) checker := health.NewChecker(cosmosClient, dockerhubClient, cfg.RepoPath) diff --git a/config/config.go b/config/config.go index e8b0694..56c7c1d 100644 --- a/config/config.go +++ b/config/config.go @@ -2,6 +2,7 @@ package config import ( "context" + "net/url" "time" "github.com/sethvargo/go-envconfig" @@ -9,13 +10,14 @@ import ( // Config holds the application configuration. type Config struct { - RPCURL string `env:"RPC_URL,default=http://localhost:1317"` + APIURL *url.URL `env:"API_URL,default=http://localhost:1317"` DockerHubUser string `env:"DOCKERHUB_USER,required"` DockerHubPassword string `env:"DOCKERHUB_PASSWORD,required"` RepoPath string `env:"REPO_PATH,required"` SourcePrefix string `env:"SOURCE_PREFIX,default=release-"` TargetPrefix string `env:"TARGET_PREFIX,required"` PollInterval time.Duration `env:"POLL_INTERVAL,default=1m"` + DryRun bool `env:"DRY_RUN,default=false"` HTTPMaxIdleConns int `env:"HTTP_MAX_IDLE_CONNS,default=100"` HTTPMaxIdleConnsPerHost int `env:"HTTP_MAX_IDLE_CONNS_PER_HOST,default=10"` diff --git a/cosmos/client.go b/cosmos/client.go index 8eb6858..d1cfcde 100644 --- a/cosmos/client.go +++ b/cosmos/client.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "net/http" + "net/url" "strconv" ) @@ -16,12 +17,12 @@ type ClientInterface interface { // Client for interacting with the Cosmos REST API. type Client struct { - rpcURL string + rpcURL *url.URL httpClient *http.Client } // NewClient creates a new Cosmos client. -func NewClient(rpcURL string, httpClient *http.Client) *Client { +func NewClient(rpcURL *url.URL, httpClient *http.Client) *Client { return &Client{ rpcURL: rpcURL, httpClient: httpClient, @@ -47,7 +48,7 @@ type LatestBlockResponse struct { // GetLatestBlockHeight returns the latest block height of the chain. func (c *Client) GetLatestBlockHeight(ctx context.Context) (int64, error) { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.rpcURL+"/blocks/latest", nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.rpcURL.JoinPath("/blocks/latest").String(), nil) if err != nil { return 0, fmt.Errorf("failed to create request: %w", err) } @@ -95,7 +96,7 @@ type ProposalsResponse struct { // GetUpgradePlans finds all passed software upgrade proposals and returns their plans. func (c *Client) GetUpgradePlans(ctx context.Context) ([]Plan, error) { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.rpcURL+"/cosmos/gov/v1beta1/proposals", nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.rpcURL.JoinPath("/cosmos/gov/v1beta1/proposals").String(), nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } diff --git a/cosmos/client_integration_test.go b/cosmos/client_integration_test.go index 99b5d3a..41e54b3 100644 --- a/cosmos/client_integration_test.go +++ b/cosmos/client_integration_test.go @@ -5,6 +5,7 @@ import ( "fmt" "net/http" "net/http/httptest" + "net/url" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -24,7 +25,9 @@ var _ = Describe("Client Integration", func() { ctx = context.Background() mux = http.NewServeMux() server = httptest.NewServer(mux) - client = cosmos.NewClient(server.URL, server.Client()) + serverURL, err := url.Parse(server.URL) + Expect(err).NotTo(HaveOccurred()) + client = cosmos.NewClient(serverURL, server.Client()) }) AfterEach(func() { diff --git a/updater/updater.go b/updater/updater.go index 891a581..32ee006 100644 --- a/updater/updater.go +++ b/updater/updater.go @@ -75,11 +75,12 @@ func (u *Updater) CheckAndProcessUpgrade(ctx context.Context) error { var pendingPlans []cosmos.Plan for _, plan := range plans { - upgradeHeight, err := strconv.ParseInt(plan.Height, 10, 64) + proposalHeight, err := strconv.ParseInt(plan.Height, 10, 64) if err != nil { xlog.Error("failed to parse upgrade height, skipping plan", "plan", plan.Name, "height", plan.Height, "err", err) continue } + upgradeHeight := proposalHeight - 1 if currentHeight >= upgradeHeight { targetTag := u.cfg.TargetPrefix + plan.Name @@ -117,6 +118,11 @@ func (u *Updater) processUpgrade(ctx context.Context, plan *cosmos.Plan) error { xlog.Info("retagging image", "repo", u.cfg.RepoPath, "source", sourceTag, "target", targetTag) + if u.cfg.DryRun { + xlog.Info("dry run enabled, skipping retag") + return nil + } + err := u.dockerhubClient.RetagImage(ctx, u.cfg.RepoPath, sourceTag, targetTag) if err != nil { return fmt.Errorf("failed to retag image: %w", err) diff --git a/updater/updater_test.go b/updater/updater_test.go index 075bd67..92e1f4b 100644 --- a/updater/updater_test.go +++ b/updater/updater_test.go @@ -42,7 +42,7 @@ var _ = Describe("Updater", func() { return plans, nil } mockCosmosClient.getLatestBlockHeightFunc = func(ctx context.Context) (int64, error) { - return 101, nil + return 100, nil } mockDockerHubClient.tagExistsFunc = func(ctx context.Context, repoPath, tag string) (bool, error) { Expect(tag).To(Equal("mainnet-v1.2.3")) @@ -115,7 +115,7 @@ var _ = Describe("Updater", func() { return plans, nil } mockCosmosClient.getLatestBlockHeightFunc = func(ctx context.Context) (int64, error) { - return 99, nil + return 98, nil } err := up.CheckAndProcessUpgrade(ctx) @@ -159,6 +159,32 @@ var _ = Describe("Updater", func() { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("cosmos boom")) }) + + Context("with dry run enabled", func() { + BeforeEach(func() { + cfg.DryRun = true + }) + + It("should not retag the image if a single upgrade height has been reached and the tag does not exist", func() { + plans := []cosmos.Plan{{Name: "v1.2.3", Height: "100"}} + mockCosmosClient.getUpgradePlansFunc = func(ctx context.Context) ([]cosmos.Plan, error) { + return plans, nil + } + mockCosmosClient.getLatestBlockHeightFunc = func(ctx context.Context) (int64, error) { + return 100, nil + } + mockDockerHubClient.tagExistsFunc = func(ctx context.Context, repoPath, tag string) (bool, error) { + Expect(tag).To(Equal("mainnet-v1.2.3")) + return false, nil + } + + err := up.CheckAndProcessUpgrade(ctx) + Expect(err).ToNot(HaveOccurred()) + + retagCalls := mockDockerHubClient.RetagCalls() + Expect(retagCalls).To(HaveLen(0)) + }) + }) }) }) From bffdac0b5d53df92153a4044124a63e5208e7b2e Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 17:04:06 +0200 Subject: [PATCH 02/15] feat: Add dry-run config validation and upgrade proposal - Make DockerHub credentials optional when in dry-run mode. - Add unit tests for the new configuration validation. - Create a sample Cosmos upgrade proposal file. - Update Dockerfile builder WORKDIR to /src. --- .gitignore | 1 + Dockerfile | 2 +- cmd/updater/main.go | 2 ++ config/config.go | 15 +++++++++-- config/config_test.go | 61 +++++++++++++++++++++++++++++++++++++++++++ mise.local.toml | 0 6 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 config/config_test.go create mode 100644 mise.local.toml diff --git a/.gitignore b/.gitignore index cf0fcce..77e2aaa 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ localai-models/ CLAUDE.md GEMINI.md /gopher-updater +docker-compose.yml diff --git a/Dockerfile b/Dockerfile index 589dfbe..b8d8e1e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ # --- Builder --- ARG GO_VERSION=1.25.1 FROM golang:${GO_VERSION}-alpine AS builder -WORKDIR /app +WORKDIR /src COPY go.mod go.sum ./ RUN go mod download COPY . . diff --git a/cmd/updater/main.go b/cmd/updater/main.go index 4c993c6..b705aa6 100644 --- a/cmd/updater/main.go +++ b/cmd/updater/main.go @@ -3,6 +3,7 @@ package main import ( "context" "fmt" + "io" "net/http" "net/http/pprof" "os" @@ -81,6 +82,7 @@ func main() { func startHTTPServer(cfg *config.Config, checker *health.Checker, cancel context.CancelFunc) *echo.Echo { e := echo.New() e.HideBanner = true + e.Logger.SetOutput(io.Discard) // --- Routes --- e.GET("/healthz", func(c echo.Context) error { diff --git a/config/config.go b/config/config.go index 56c7c1d..515929f 100644 --- a/config/config.go +++ b/config/config.go @@ -2,6 +2,7 @@ package config import ( "context" + "fmt" "net/url" "time" @@ -11,8 +12,8 @@ import ( // Config holds the application configuration. type Config struct { APIURL *url.URL `env:"API_URL,default=http://localhost:1317"` - DockerHubUser string `env:"DOCKERHUB_USER,required"` - DockerHubPassword string `env:"DOCKERHUB_PASSWORD,required"` + DockerHubUser string `env:"DOCKERHUB_USER"` + DockerHubPassword string `env:"DOCKERHUB_PASSWORD"` RepoPath string `env:"REPO_PATH,required"` SourcePrefix string `env:"SOURCE_PREFIX,default=release-"` TargetPrefix string `env:"TARGET_PREFIX,required"` @@ -31,5 +32,15 @@ func New(ctx context.Context) (*Config, error) { if err := envconfig.Process(ctx, &cfg); err != nil { return nil, err } + + if !cfg.DryRun { + if cfg.DockerHubUser == "" { + return nil, fmt.Errorf("DOCKERHUB_USER is required when not in dry-run mode") + } + if cfg.DockerHubPassword == "" { + return nil, fmt.Errorf("DOCKERHUB_PASSWORD is required when not in dry-run mode") + } + } + return &cfg, nil } diff --git a/config/config_test.go b/config/config_test.go new file mode 100644 index 0000000..eaf0013 --- /dev/null +++ b/config/config_test.go @@ -0,0 +1,61 @@ +package config_test + +import ( + "context" + "os" + "testing" + + "github.com/gopher-lab/gopher-updater/config" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestConfig(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Config Suite") +} + +var _ = Describe("Config", func() { + var ctx context.Context + + BeforeEach(func() { + ctx = context.Background() + os.Clearenv() + }) + + Context("when creating a new config", func() { + It("should return an error if dry run is false and dockerhub user is not set", func() { + Expect(os.Setenv("DOCKERHUB_PASSWORD", "password")).ToNot(HaveOccurred()) + Expect(os.Setenv("REPO_PATH", "repo")).ToNot(HaveOccurred()) + Expect(os.Setenv("TARGET_PREFIX", "prefix")).ToNot(HaveOccurred()) + _, err := config.New(ctx) + Expect(err).To(HaveOccurred()) + }) + + It("should return an error if dry run is false and dockerhub password is not set", func() { + Expect(os.Setenv("DOCKERHUB_USER", "user")).ToNot(HaveOccurred()) + Expect(os.Setenv("REPO_PATH", "repo")).ToNot(HaveOccurred()) + Expect(os.Setenv("TARGET_PREFIX", "prefix")).ToNot(HaveOccurred()) + _, err := config.New(ctx) + Expect(err).To(HaveOccurred()) + }) + + It("should not return an error if dry run is true and dockerhub credentials are not set", func() { + Expect(os.Setenv("DRY_RUN", "true")).ToNot(HaveOccurred()) + Expect(os.Setenv("REPO_PATH", "repo")).ToNot(HaveOccurred()) + Expect(os.Setenv("TARGET_PREFIX", "prefix")).ToNot(HaveOccurred()) + _, err := config.New(ctx) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should not return an error if dry run is false and dockerhub credentials are set", func() { + Expect(os.Setenv("DOCKERHUB_USER", "user")).ToNot(HaveOccurred()) + Expect(os.Setenv("DOCKERHUB_PASSWORD", "password")).ToNot(HaveOccurred()) + Expect(os.Setenv("REPO_PATH", "repo")).ToNot(HaveOccurred()) + Expect(os.Setenv("TARGET_PREFIX", "prefix")).ToNot(HaveOccurred()) + _, err := config.New(ctx) + Expect(err).ToNot(HaveOccurred()) + }) + }) +}) diff --git a/mise.local.toml b/mise.local.toml new file mode 100644 index 0000000..e69de29 From 4b91e10985ae121e95039b3f899d18a9c5923a0a Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 18:55:50 +0200 Subject: [PATCH 03/15] fix: Improve error logging in updater loop --- updater/updater.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/updater/updater.go b/updater/updater.go index 32ee006..aa60810 100644 --- a/updater/updater.go +++ b/updater/updater.go @@ -40,7 +40,7 @@ func (u *Updater) Run(ctx context.Context) error { xlog.Info("performing initial check for software upgrade proposal") if err := u.CheckAndProcessUpgrade(ctx); err != nil { - xlog.Error("failed to process upgrade on initial check", "err", err) + xlog.Error("error checking for upgrade on initial check", "err", err) } for { @@ -50,7 +50,7 @@ func (u *Updater) Run(ctx context.Context) error { case <-ticker.C: xlog.Info("checking for software upgrade proposal") if err := u.CheckAndProcessUpgrade(ctx); err != nil { - xlog.Error("failed to process upgrade", "err", err) + xlog.Error("error checking for upgrade", "err", err) } } } From 7d66b4cc45f2abaf652e0cd8a3de99917175f447 Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 19:08:54 +0200 Subject: [PATCH 04/15] fix: Use v1 gov API endpoint for proposals --- cosmos/client.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cosmos/client.go b/cosmos/client.go index d1cfcde..3a4239d 100644 --- a/cosmos/client.go +++ b/cosmos/client.go @@ -96,7 +96,12 @@ type ProposalsResponse struct { // GetUpgradePlans finds all passed software upgrade proposals and returns their plans. func (c *Client) GetUpgradePlans(ctx context.Context) ([]Plan, error) { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.rpcURL.JoinPath("/cosmos/gov/v1beta1/proposals").String(), nil) + reqURL := c.rpcURL.JoinPath("/cosmos/gov/v1/proposals") + q := reqURL.Query() + q.Set("proposal_status", "3") // PROPOSAL_STATUS_PASSED + reqURL.RawQuery = q.Encode() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL.String(), nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } From 0004fb083bc1c2299378ebc80d03446612f8a548 Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 19:17:59 +0200 Subject: [PATCH 05/15] fix(cosmos): Update structs and logic for v1 gov API The previous fix to use the v1 API endpoint was incomplete. The JSON response structure for v1 proposals is different from v1beta1, requiring updated Go structs and parsing logic to correctly identify passed software upgrade proposals. --- cosmos/client.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cosmos/client.go b/cosmos/client.go index 3a4239d..98b5436 100644 --- a/cosmos/client.go +++ b/cosmos/client.go @@ -86,8 +86,14 @@ type ProposalContent struct { } type Proposal struct { - Status string `json:"status"` - Content ProposalContent `json:"content"` + Status string `json:"status"` + Messages []Message `json:"messages"` +} + +// Message is a generic message in a proposal. +type Message struct { + Type string `json:"@type"` + Plan Plan `json:"plan"` } type ProposalsResponse struct { @@ -123,8 +129,12 @@ func (c *Client) GetUpgradePlans(ctx context.Context) ([]Plan, error) { var plans []Plan for _, p := range proposalsResp.Proposals { - if p.Status == "PROPOSAL_STATUS_PASSED" && p.Content.Type == "/cosmos.upgrade.v1beta1.SoftwareUpgradeProposal" { - plans = append(plans, p.Content.Plan) + if p.Status == "PROPOSAL_STATUS_PASSED" { + for _, msg := range p.Messages { + if msg.Type == "/cosmos.upgrade.v1beta1.MsgSoftwareUpgrade" { + plans = append(plans, msg.Plan) + } + } } } From a7bb84cd91abc274402ce8ac81fffc363e89332b Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 19:29:25 +0200 Subject: [PATCH 06/15] test(cosmos): Update integration tests for v1 gov API --- cosmos/client_integration_test.go | 35 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/cosmos/client_integration_test.go b/cosmos/client_integration_test.go index 41e54b3..3c86d22 100644 --- a/cosmos/client_integration_test.go +++ b/cosmos/client_integration_test.go @@ -68,28 +68,35 @@ var _ = Describe("Client Integration", func() { Describe("GetUpgradePlans", func() { It("should correctly parse and filter for passed software upgrade proposals", func() { - mux.HandleFunc("/cosmos/gov/v1beta1/proposals", func(w http.ResponseWriter, r *http.Request) { + mux.HandleFunc("/cosmos/gov/v1/proposals", func(w http.ResponseWriter, r *http.Request) { + Expect(r.URL.Query().Get("proposal_status")).To(Equal("3")) _, err := fmt.Fprint(w, `{ "proposals": [ { "status": "PROPOSAL_STATUS_PASSED", - "content": { - "@type": "/cosmos.upgrade.v1beta1.SoftwareUpgradeProposal", - "plan": { "name": "v1.2.3", "height": "100" } - } + "messages": [ + { + "@type": "/cosmos.upgrade.v1beta1.MsgSoftwareUpgrade", + "plan": { "name": "v1.2.3", "height": "100" } + } + ] }, { "status": "PROPOSAL_STATUS_REJECTED", - "content": { - "@type": "/cosmos.upgrade.v1beta1.SoftwareUpgradeProposal", - "plan": { "name": "v1.2.4", "height": "200" } - } + "messages": [ + { + "@type": "/cosmos.upgrade.v1beta1.MsgSoftwareUpgrade", + "plan": { "name": "v1.2.4", "height": "200" } + } + ] }, { "status": "PROPOSAL_STATUS_PASSED", - "content": { - "@type": "/cosmos.params.v1beta1.ParameterChangeProposal" - } + "messages": [ + { + "@type": "/cosmos.params.v1beta1.ParameterChangeProposal" + } + ] } ] }`) @@ -104,7 +111,7 @@ var _ = Describe("Client Integration", func() { }) It("should return an empty slice when no passed upgrade proposals are found", func() { - mux.HandleFunc("/cosmos/gov/v1beta1/proposals", func(w http.ResponseWriter, r *http.Request) { + mux.HandleFunc("/cosmos/gov/v1/proposals", func(w http.ResponseWriter, r *http.Request) { _, err := fmt.Fprint(w, `{"proposals": []}`) Expect(err).NotTo(HaveOccurred()) }) @@ -115,7 +122,7 @@ var _ = Describe("Client Integration", func() { }) It("should return an error on a non-200 status code", func() { - mux.HandleFunc("/cosmos/gov/v1beta1/proposals", func(w http.ResponseWriter, r *http.Request) { + mux.HandleFunc("/cosmos/gov/v1/proposals", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) }) From 7162b0bd0b78bfec754d58f92294802c0aaa90ef Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 19:29:40 +0200 Subject: [PATCH 07/15] chore(dev): Add definitive Cosmovisor config for GitOps --- docker-compose.yml | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..e0e444f --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,43 @@ +services: + gopher-node: + build: + context: ../gopher + container_name: gopher-testnet + restart: unless-stopped + environment: + - DAEMON_NAME=gopherd + - DAEMON_RESTART_AFTER_UPGRADE=true + - DAEMON_ALLOW_DOWNLOAD_BINARIES=false + - UNSAFE_SKIP_BACKUP=true + - ALICE_MNEMONIC=${ALICE_MNEMONIC:-} + - BOB_MNEMONIC=${BOB_MNEMONIC:-} + ports: + - "26656:26656" # P2P + - "26657:26657" # RPC + - "1317:1317" # REST API + - "9090:9090" # gRPC + volumes: + - gopher-data:/home/gopher/.gopher + networks: + - gopher-network + + gopher-updater: + build: . + container_name: gopher-updater + restart: unless-stopped + environment: + DRY_RUN: "true" + API_URL: "http://gopher-node:1317" + REPO_PATH: "test/test" + SOURCE_PREFIX: "release-" + TARGET_PREFIX: "testnet-" + POLL_INTERVAL: "5s" + networks: + - gopher-network + +volumes: + gopher-data: + +networks: + gopher-network: + driver: bridge From 5a8fa1bed4890208f90143cfe46cf65929abe4df Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 20:00:43 +0200 Subject: [PATCH 08/15] fix(cosmos): Correctly parse nested v1 gov proposal messages The previous fix for the v1 API was incomplete. The upgrade plan is nested within a 'content' object inside the 'messages' array. This commit updates the structs and parsing logic to correctly handle this nested structure, definitively fixing the 'no passed proposals found' bug. --- cosmos/client.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cosmos/client.go b/cosmos/client.go index 98b5436..693e3e2 100644 --- a/cosmos/client.go +++ b/cosmos/client.go @@ -92,8 +92,8 @@ type Proposal struct { // Message is a generic message in a proposal. type Message struct { - Type string `json:"@type"` - Plan Plan `json:"plan"` + Type string `json:"@type"` + Content ProposalContent `json:"content"` } type ProposalsResponse struct { @@ -131,8 +131,8 @@ func (c *Client) GetUpgradePlans(ctx context.Context) ([]Plan, error) { for _, p := range proposalsResp.Proposals { if p.Status == "PROPOSAL_STATUS_PASSED" { for _, msg := range p.Messages { - if msg.Type == "/cosmos.upgrade.v1beta1.MsgSoftwareUpgrade" { - plans = append(plans, msg.Plan) + if msg.Type == "/cosmos.gov.v1.MsgExecLegacyContent" && msg.Content.Type == "/cosmos.upgrade.v1beta1.SoftwareUpgradeProposal" { + plans = append(plans, msg.Content.Plan) } } } From b964cc020308ad0af3f4c92b738651dc9674523f Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 20:07:02 +0200 Subject: [PATCH 09/15] feat(cosmos): Add debug logging for proposal responses To diagnose API parsing issues, this commit introduces debug logging to print the raw response body from the proposals endpoint. The log level is now configurable via the LOG_LEVEL environment variable, which is set to 'debug' in the docker-compose file for development. --- cosmos/client.go | 13 +++++++++++++ docker-compose.yml | 1 + 2 files changed, 14 insertions(+) diff --git a/cosmos/client.go b/cosmos/client.go index 693e3e2..0e59be1 100644 --- a/cosmos/client.go +++ b/cosmos/client.go @@ -1,12 +1,16 @@ package cosmos import ( + "bytes" "context" "encoding/json" "fmt" + "io" "net/http" "net/url" "strconv" + + "github.com/gopher-lab/gopher-updater/pkg/xlog" ) // ClientInterface defines the methods to interact with a Cosmos chain. @@ -122,6 +126,15 @@ func (c *Client) GetUpgradePlans(ctx context.Context) ([]Plan, error) { return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } + // Read the body for debugging + bodyBytes, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + xlog.Debug("received proposals response", "body", string(bodyBytes)) + // Replace the body so it can be read again by the JSON decoder + resp.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) + var proposalsResp ProposalsResponse if err := json.NewDecoder(resp.Body).Decode(&proposalsResp); err != nil { return nil, fmt.Errorf("failed to decode proposals response: %w", err) diff --git a/docker-compose.yml b/docker-compose.yml index e0e444f..0ebe5d5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,6 +26,7 @@ services: container_name: gopher-updater restart: unless-stopped environment: + LOG_LEVEL: "debug" DRY_RUN: "true" API_URL: "http://gopher-node:1317" REPO_PATH: "test/test" From d4186b1b6f13f4a61cb28fb0b272802e5a10fbd1 Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 20:08:10 +0200 Subject: [PATCH 10/15] refactor(cosmos): Revert incorrect parsing logic and fix tests The previous parsing logic for v1 proposals was incorrect and broke the tests. This commit reverts the faulty logic and structs, leaving only the debug logger in place. The goal is to capture the raw API response in the next E2E run to inform the correct implementation. --- cosmos/client.go | 12 +++--------- mise.local.toml | 3 +++ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/cosmos/client.go b/cosmos/client.go index 0e59be1..32dcceb 100644 --- a/cosmos/client.go +++ b/cosmos/client.go @@ -84,7 +84,7 @@ type Plan struct { Height string `json:"height"` } -type ProposalContent struct { +type Message struct { Type string `json:"@type"` Plan Plan `json:"plan"` } @@ -94,12 +94,6 @@ type Proposal struct { Messages []Message `json:"messages"` } -// Message is a generic message in a proposal. -type Message struct { - Type string `json:"@type"` - Content ProposalContent `json:"content"` -} - type ProposalsResponse struct { Proposals []Proposal `json:"proposals"` } @@ -144,8 +138,8 @@ func (c *Client) GetUpgradePlans(ctx context.Context) ([]Plan, error) { for _, p := range proposalsResp.Proposals { if p.Status == "PROPOSAL_STATUS_PASSED" { for _, msg := range p.Messages { - if msg.Type == "/cosmos.gov.v1.MsgExecLegacyContent" && msg.Content.Type == "/cosmos.upgrade.v1beta1.SoftwareUpgradeProposal" { - plans = append(plans, msg.Content.Plan) + if msg.Type == "/cosmos.upgrade.v1beta1.MsgSoftwareUpgrade" { + plans = append(plans, msg.Plan) } } } diff --git a/mise.local.toml b/mise.local.toml index e69de29..d4c9bbf 100644 --- a/mise.local.toml +++ b/mise.local.toml @@ -0,0 +1,3 @@ +[env] +ALICE_MNEMONIC = "what close joke frequent rookie afraid lobster raise punch cluster industry talent render game stage ski math success rail elder critic negative there swap" +BOB_MNEMONIC = "hockey clever provide steel bachelor escape deal mansion dirt drift trouble coach setup tape chuckle report kangaroo concert congress first differ dragon hurry stamp" From 37cfc2c242d3c96ef7e975b881bbb612bf74eb69 Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 20:15:02 +0200 Subject: [PATCH 11/15] fix(dev): Set correct Cosmovisor config for GitOps restart The DAEMON_RESTART_AFTER_UPGRADE variable was incorrectly set to true. For a GitOps/Kubernetes workflow where the orchestrator handles restarts, this must be set to 'false'. This definitive fix ensures Cosmovisor will stop after an upgrade, allowing the orchestrator to take over as intended. --- docker-compose.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 0ebe5d5..df64a65 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,10 +5,7 @@ services: container_name: gopher-testnet restart: unless-stopped environment: - - DAEMON_NAME=gopherd - - DAEMON_RESTART_AFTER_UPGRADE=true - - DAEMON_ALLOW_DOWNLOAD_BINARIES=false - - UNSAFE_SKIP_BACKUP=true + - DAEMON_RESTART_AFTER_UPGRADE=false - ALICE_MNEMONIC=${ALICE_MNEMONIC:-} - BOB_MNEMONIC=${BOB_MNEMONIC:-} ports: From 04bd207cb9c11e8f3e7a7ee661367715d2b297ef Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 20:20:17 +0200 Subject: [PATCH 12/15] fix(updater): Handle race condition during upgrade halt When the chain halts for an upgrade, a race condition can occur where the proposal is found but the subsequent block height query fails. This commit fixes the logic to handle this specific error gracefully. The updater now logs a warning and proceeds to process the upgrade, ensuring the Docker image is retagged as intended. --- updater/updater.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/updater/updater.go b/updater/updater.go index aa60810..ddc3bfb 100644 --- a/updater/updater.go +++ b/updater/updater.go @@ -70,7 +70,11 @@ func (u *Updater) CheckAndProcessUpgrade(ctx context.Context) error { currentHeight, err := u.cosmosClient.GetLatestBlockHeight(ctx) if err != nil { - return fmt.Errorf("failed to get latest block height: %w", err) + // This can happen if the chain halts for the upgrade between the proposal check and this call. + // We'll log a warning and proceed, assuming the upgrade is happening. + xlog.Warn("failed to get latest block height, proceeding with upgrade check", "err", err) + // Set a sentinel height that is guaranteed to be 'after' any valid proposal height + currentHeight = -1 } var pendingPlans []cosmos.Plan @@ -82,7 +86,9 @@ func (u *Updater) CheckAndProcessUpgrade(ctx context.Context) error { } upgradeHeight := proposalHeight - 1 - if currentHeight >= upgradeHeight { + // If currentHeight is our sentinel value, it means the chain is down, + // so we should assume any passed proposal is ready to be processed. + if currentHeight == -1 || currentHeight >= upgradeHeight { targetTag := u.cfg.TargetPrefix + plan.Name exists, err := u.dockerhubClient.TagExists(ctx, u.cfg.RepoPath, targetTag) if err != nil { From 13ee0563515436a45b59cdd441e49381d0e1cc93 Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 20:23:40 +0200 Subject: [PATCH 13/15] fix(updater): Implement robust race condition handling The previous race condition fix was too aggressive. This commit implements a much safer logic. The updater now stores the last known block height. If an API call fails, it only proceeds with the upgrade if the last known height is within 5 blocks of a known, passed upgrade proposal. This prevents false positives while correctly handling the chain halt race condition. The poll interval has also been reduced to 1s for faster E2E testing. --- docker-compose.yml | 2 +- updater/updater.go | 35 +++++++++++++++++++++++------------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index df64a65..0570ba4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,7 +29,7 @@ services: REPO_PATH: "test/test" SOURCE_PREFIX: "release-" TARGET_PREFIX: "testnet-" - POLL_INTERVAL: "5s" + POLL_INTERVAL: "1s" networks: - gopher-network diff --git a/updater/updater.go b/updater/updater.go index ddc3bfb..ec5f7b4 100644 --- a/updater/updater.go +++ b/updater/updater.go @@ -18,6 +18,7 @@ type Updater struct { cosmosClient cosmos.ClientInterface dockerhubClient dockerhub.ClientInterface cfg *config.Config + lastHeight int64 } // New creates a new Updater. @@ -58,6 +59,27 @@ func (u *Updater) Run(ctx context.Context) error { // CheckAndProcessUpgrade fetches all passed upgrade plans and processes the next available one. func (u *Updater) CheckAndProcessUpgrade(ctx context.Context) error { + currentHeight, err := u.cosmosClient.GetLatestBlockHeight(ctx) + if err != nil { + // If we can't get the height, we should check if we are near a known upgrade. + if u.lastHeight > 0 { + plans, pErr := u.cosmosClient.GetUpgradePlans(ctx) + if pErr != nil { + // If we can't get proposals either, we can't do anything. + return fmt.Errorf("failed to get latest block height and proposals: height_err=%w, proposal_err=%v", err, pErr) + } + for _, plan := range plans { + proposalHeight, _ := strconv.ParseInt(plan.Height, 10, 64) + if proposalHeight > 0 && u.lastHeight >= proposalHeight-5 { + xlog.Warn("failed to get latest block height, but we are within 5 blocks of a known upgrade. Assuming chain is halted and proceeding.", "lastKnownHeight", u.lastHeight, "upgradeHeight", proposalHeight) + return u.processUpgrade(ctx, &plan) + } + } + } + return fmt.Errorf("failed to get latest block height: %w", err) + } + u.lastHeight = currentHeight + plans, err := u.cosmosClient.GetUpgradePlans(ctx) if err != nil { return fmt.Errorf("failed to get upgrade plans: %w", err) @@ -68,15 +90,6 @@ func (u *Updater) CheckAndProcessUpgrade(ctx context.Context) error { return nil } - currentHeight, err := u.cosmosClient.GetLatestBlockHeight(ctx) - if err != nil { - // This can happen if the chain halts for the upgrade between the proposal check and this call. - // We'll log a warning and proceed, assuming the upgrade is happening. - xlog.Warn("failed to get latest block height, proceeding with upgrade check", "err", err) - // Set a sentinel height that is guaranteed to be 'after' any valid proposal height - currentHeight = -1 - } - var pendingPlans []cosmos.Plan for _, plan := range plans { proposalHeight, err := strconv.ParseInt(plan.Height, 10, 64) @@ -86,9 +99,7 @@ func (u *Updater) CheckAndProcessUpgrade(ctx context.Context) error { } upgradeHeight := proposalHeight - 1 - // If currentHeight is our sentinel value, it means the chain is down, - // so we should assume any passed proposal is ready to be processed. - if currentHeight == -1 || currentHeight >= upgradeHeight { + if currentHeight >= upgradeHeight { targetTag := u.cfg.TargetPrefix + plan.Name exists, err := u.dockerhubClient.TagExists(ctx, u.cfg.RepoPath, targetTag) if err != nil { From 7b48bb4ae08e77ff985479ec22ca31f7bdce66c6 Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 20:29:30 +0200 Subject: [PATCH 14/15] fix(cosmos): Use correct endpoint and structs for v1 API The updater was failing with 501 errors because it was using a legacy endpoint to get the block height. This commit updates the client to use the correct '/cosmos/base/tendermint/v1beta1/blocks/latest' endpoint. It also corrects the associated structs and integration tests to match the modern API response, definitively fixing the recurring API incompatibility. --- cosmos/client.go | 2 +- cosmos/client_integration_test.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cosmos/client.go b/cosmos/client.go index 32dcceb..d632d87 100644 --- a/cosmos/client.go +++ b/cosmos/client.go @@ -52,7 +52,7 @@ type LatestBlockResponse struct { // GetLatestBlockHeight returns the latest block height of the chain. func (c *Client) GetLatestBlockHeight(ctx context.Context) (int64, error) { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.rpcURL.JoinPath("/blocks/latest").String(), nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.rpcURL.JoinPath("/cosmos/base/tendermint/v1beta1/blocks/latest").String(), nil) if err != nil { return 0, fmt.Errorf("failed to create request: %w", err) } diff --git a/cosmos/client_integration_test.go b/cosmos/client_integration_test.go index 3c86d22..a7128fb 100644 --- a/cosmos/client_integration_test.go +++ b/cosmos/client_integration_test.go @@ -36,7 +36,7 @@ var _ = Describe("Client Integration", func() { Describe("GetLatestBlockHeight", func() { It("should return the correct block height on a valid response", func() { - mux.HandleFunc("/blocks/latest", func(w http.ResponseWriter, r *http.Request) { + mux.HandleFunc("/cosmos/base/tendermint/v1beta1/blocks/latest", func(w http.ResponseWriter, r *http.Request) { _, err := fmt.Fprint(w, `{"block":{"header":{"height":"12345"}}}`) Expect(err).NotTo(HaveOccurred()) }) @@ -47,7 +47,7 @@ var _ = Describe("Client Integration", func() { }) It("should return an error on a non-200 status code", func() { - mux.HandleFunc("/blocks/latest", func(w http.ResponseWriter, r *http.Request) { + mux.HandleFunc("/cosmos/base/tendermint/v1beta1/blocks/latest", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) }) @@ -56,7 +56,7 @@ var _ = Describe("Client Integration", func() { }) It("should return an error on malformed JSON", func() { - mux.HandleFunc("/blocks/latest", func(w http.ResponseWriter, r *http.Request) { + mux.HandleFunc("/cosmos/base/tendermint/v1beta1/blocks/latest", func(w http.ResponseWriter, r *http.Request) { _, err := fmt.Fprint(w, `{"block":{"header":{"height":malformed}}}`) Expect(err).NotTo(HaveOccurred()) }) From 008a94578c0c7be119435daee9a7f593b52b2aad Mon Sep 17 00:00:00 2001 From: mcamou Date: Fri, 24 Oct 2025 20:36:15 +0200 Subject: [PATCH 15/15] fix(updater): Implement definitive race condition handling The updater logic was flawed, causing it to fail during the chain halt race condition. This commit refactors the check to be more robust. It now only attempts to get the block height *after* finding passed proposals. If the height query fails, it then safely checks if the last known height is within a 5-block threshold of a specific proposal before proceeding. This is the definitive fix for the E2E test. --- updater/updater.go | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/updater/updater.go b/updater/updater.go index ec5f7b4..8a9b935 100644 --- a/updater/updater.go +++ b/updater/updater.go @@ -59,27 +59,6 @@ func (u *Updater) Run(ctx context.Context) error { // CheckAndProcessUpgrade fetches all passed upgrade plans and processes the next available one. func (u *Updater) CheckAndProcessUpgrade(ctx context.Context) error { - currentHeight, err := u.cosmosClient.GetLatestBlockHeight(ctx) - if err != nil { - // If we can't get the height, we should check if we are near a known upgrade. - if u.lastHeight > 0 { - plans, pErr := u.cosmosClient.GetUpgradePlans(ctx) - if pErr != nil { - // If we can't get proposals either, we can't do anything. - return fmt.Errorf("failed to get latest block height and proposals: height_err=%w, proposal_err=%v", err, pErr) - } - for _, plan := range plans { - proposalHeight, _ := strconv.ParseInt(plan.Height, 10, 64) - if proposalHeight > 0 && u.lastHeight >= proposalHeight-5 { - xlog.Warn("failed to get latest block height, but we are within 5 blocks of a known upgrade. Assuming chain is halted and proceeding.", "lastKnownHeight", u.lastHeight, "upgradeHeight", proposalHeight) - return u.processUpgrade(ctx, &plan) - } - } - } - return fmt.Errorf("failed to get latest block height: %w", err) - } - u.lastHeight = currentHeight - plans, err := u.cosmosClient.GetUpgradePlans(ctx) if err != nil { return fmt.Errorf("failed to get upgrade plans: %w", err) @@ -90,6 +69,26 @@ func (u *Updater) CheckAndProcessUpgrade(ctx context.Context) error { return nil } + // We only need to get the height if there are plans to process. + currentHeight, err := u.cosmosClient.GetLatestBlockHeight(ctx) + if err != nil { + // The chain might be halted. Check if we are near an upgrade height. + for _, plan := range plans { + proposalHeight, pErr := strconv.ParseInt(plan.Height, 10, 64) + if pErr != nil { + xlog.Error("failed to parse upgrade height for plan, skipping", "plan", plan.Name, "height", plan.Height, "err", pErr) + continue + } + + if u.lastHeight > 0 && u.lastHeight >= proposalHeight-5 { + xlog.Warn("failed to get latest block height, but last known height is within 5 blocks of a passed proposal. Assuming chain has halted for upgrade.", "lastKnownHeight", u.lastHeight, "upgradeHeight", proposalHeight) + return u.processUpgrade(ctx, &plan) + } + } + return fmt.Errorf("failed to get latest block height: %w", err) + } + u.lastHeight = currentHeight + var pendingPlans []cosmos.Plan for _, plan := range plans { proposalHeight, err := strconv.ParseInt(plan.Height, 10, 64)