From bd05357eee54cdeff5ea9933c0bb5ed4fb59ad86 Mon Sep 17 00:00:00 2001 From: amyangfei Date: Sat, 24 Jan 2026 21:14:30 +0800 Subject: [PATCH] playground: wait PD ready before starting TiDB to avoid cluster ID mismatch --- components/playground/instance/pd.go | 40 ++++++++++++++++++++++++++++ components/playground/playground.go | 14 ++++++++++ 2 files changed, 54 insertions(+) diff --git a/components/playground/instance/pd.go b/components/playground/instance/pd.go index c923fb2ff2..c35e4c86d9 100644 --- a/components/playground/instance/pd.go +++ b/components/playground/instance/pd.go @@ -15,9 +15,12 @@ package instance import ( "context" + "encoding/json" "fmt" + "net/http" "path/filepath" "strings" + "time" "github.com/pingcap/errors" "github.com/pingcap/tiup/pkg/tidbver" @@ -236,3 +239,40 @@ func (inst *PDInstance) LogFile() string { func (inst *PDInstance) Addr() string { return utils.JoinHostPort(AdvertiseHost(inst.Host), inst.StatusPort) } + +// Ready returns nil when PD is ready to serve. +func (inst *PDInstance) Ready(ctx context.Context) error { + url := fmt.Sprintf("http://%s/pd/api/v1/members", inst.Addr()) + + var r struct { + Header struct { + ClusterID uint64 `json:"cluster_id"` + } `json:"header"` + } + + ready := func() bool { + resp, err := http.Get(url) + if err != nil { + return false + } + defer resp.Body.Close() + if resp.StatusCode == 200 { + err = json.NewDecoder(resp.Body).Decode(&r) + return err == nil && r.Header.ClusterID != 0 + } + return false + } + + for { + if ready() { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(time.Second): + // retry + } + } +} diff --git a/components/playground/playground.go b/components/playground/playground.go index 4d4a343dbc..5a2cad056c 100644 --- a/components/playground/playground.go +++ b/components/playground/playground.go @@ -1285,6 +1285,7 @@ func (p *Playground) bootCluster(ctx context.Context, env *environment.Environme anyPumpReady := false allDMMasterReady := false + allPDReady := false // Start all instance except tiflash. err := p.WalkInstances(func(cid string, ins instance.Instance) error { if cid == spec.ComponentTiFlash { @@ -1296,6 +1297,19 @@ func (p *Playground) bootCluster(ctx context.Context, env *environment.Environme allDMMasterReady = true } + // wait all PD ready before starting TiDB to avoid cluster ID mismatch + if cid == spec.ComponentTiDB && !allPDReady { + for _, pd := range p.pds { + pdCtx, cancel := context.WithTimeout(ctx, time.Second*120) + err := pd.Ready(pdCtx) + cancel() + if err != nil { + return errors.Annotatef(err, "failed to wait PD %s to be ready", pd.Addr()) + } + } + allPDReady = true + } + err := p.startInstance(ctx, ins) if err != nil { return err