From 9b527396275ff827f036252b78befe15bafc15ea Mon Sep 17 00:00:00 2001 From: Arjan Bal Date: Mon, 27 Oct 2025 12:03:47 +0530 Subject: [PATCH 1/8] build works --- .../endpointsharding_ext_test.go | 8 +- balancer/lazy/lazy_ext_test.go | 12 +- balancer/leastrequest/leastrequest.go | 6 +- .../{pickfirstleaf => }/metrics_test.go | 6 +- balancer/pickfirst/pickfirst.go | 291 ------ balancer/pickfirst/pickfirst_ext_test.go | 13 - balancer/pickfirst/pickfirstleaf.go | 903 ++++++++++++++++++ .../pickfirst/pickfirstleaf/pickfirstleaf.go | 890 +---------------- .../pickfirstleaf_ext_test.go | 32 +- .../{pickfirstleaf => }/pickfirstleaf_test.go | 19 +- balancer/ringhash/ringhash.go | 6 +- balancer/roundrobin/roundrobin.go | 6 +- balancer/weightedroundrobin/balancer.go | 6 +- clientconn_test.go | 6 +- .../customroundrobin/customroundrobin.go | 4 +- internal/envconfig/envconfig.go | 6 - .../outlierdetection/balancer_test.go | 4 +- .../e2e_test/outlierdetection_test.go | 10 - test/clientconn_state_transition_test.go | 15 +- xds/googledirectpath/googlec2p.go | 8 +- xds/googledirectpath/googlec2p_test.go | 2 +- 21 files changed, 959 insertions(+), 1294 deletions(-) rename balancer/pickfirst/{pickfirstleaf => }/metrics_test.go (98%) delete mode 100644 balancer/pickfirst/pickfirst.go create mode 100644 balancer/pickfirst/pickfirstleaf.go rename balancer/pickfirst/{pickfirstleaf => }/pickfirstleaf_ext_test.go (98%) rename balancer/pickfirst/{pickfirstleaf => }/pickfirstleaf_test.go (95%) diff --git a/balancer/endpointsharding/endpointsharding_ext_test.go b/balancer/endpointsharding/endpointsharding_ext_test.go index 3a71503c5c68..db3d1f2a065a 100644 --- a/balancer/endpointsharding/endpointsharding_ext_test.go +++ b/balancer/endpointsharding/endpointsharding_ext_test.go @@ -31,7 +31,7 @@ import ( "google.golang.org/grpc/backoff" "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/endpointsharding" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" + "google.golang.org/grpc/balancer/pickfirst" "google.golang.org/grpc/codes" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" @@ -84,7 +84,7 @@ func (fakePetioleBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptio ClientConn: cc, bOpts: opts, } - fp.Balancer = endpointsharding.NewBalancer(fp, opts, balancer.Get(pickfirstleaf.Name).Build, endpointsharding.Options{}) + fp.Balancer = endpointsharding.NewBalancer(fp, opts, balancer.Get(pickfirst.Name).Build, endpointsharding.Options{}) return fp } @@ -222,7 +222,7 @@ func (s) TestEndpointShardingReconnectDisabled(t *testing.T) { bf := stub.BalancerFuncs{ Init: func(bd *stub.BalancerData) { epOpts := endpointsharding.Options{DisableAutoReconnect: true} - bd.ChildBalancer = endpointsharding.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirstleaf.Name).Build, epOpts) + bd.ChildBalancer = endpointsharding.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirst.Name).Build, epOpts) }, UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { return bd.ChildBalancer.UpdateClientConnState(ccs) @@ -303,7 +303,7 @@ func (s) TestEndpointShardingExitIdle(t *testing.T) { bf := stub.BalancerFuncs{ Init: func(bd *stub.BalancerData) { epOpts := endpointsharding.Options{DisableAutoReconnect: true} - bd.ChildBalancer = endpointsharding.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirstleaf.Name).Build, epOpts) + bd.ChildBalancer = endpointsharding.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirst.Name).Build, epOpts) }, UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { return bd.ChildBalancer.UpdateClientConnState(ccs) diff --git a/balancer/lazy/lazy_ext_test.go b/balancer/lazy/lazy_ext_test.go index 53ca6574313a..dee0770cab92 100644 --- a/balancer/lazy/lazy_ext_test.go +++ b/balancer/lazy/lazy_ext_test.go @@ -29,7 +29,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/lazy" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" + "google.golang.org/grpc/balancer/pickfirst" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/internal/balancer/stub" @@ -79,7 +79,7 @@ func (s) TestExitIdle(t *testing.T) { bf := stub.BalancerFuncs{ Init: func(bd *stub.BalancerData) { - bd.ChildBalancer = lazy.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirstleaf.Name).Build) + bd.ChildBalancer = lazy.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirst.Name).Build) }, ExitIdle: func(bd *stub.BalancerData) { bd.ChildBalancer.ExitIdle() @@ -144,7 +144,7 @@ func (s) TestPicker(t *testing.T) { bf := stub.BalancerFuncs{ Init: func(bd *stub.BalancerData) { - bd.ChildBalancer = lazy.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirstleaf.Name).Build) + bd.ChildBalancer = lazy.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirst.Name).Build) }, ExitIdle: func(*stub.BalancerData) { t.Log("Ignoring call to ExitIdle, calling the picker should make the lazy balancer exit IDLE state.") @@ -201,7 +201,7 @@ func (s) TestGoodUpdateThenResolverError(t *testing.T) { childBF := stub.BalancerFuncs{ Init: func(bd *stub.BalancerData) { - bd.ChildBalancer = balancer.Get(pickfirstleaf.Name).Build(bd.ClientConn, bd.BuildOptions) + bd.ChildBalancer = balancer.Get(pickfirst.Name).Build(bd.ClientConn, bd.BuildOptions) }, UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { if resolverErrorReceived.HasFired() { @@ -306,7 +306,7 @@ func (s) TestResolverErrorThenGoodUpdate(t *testing.T) { childBF := stub.BalancerFuncs{ Init: func(bd *stub.BalancerData) { - bd.ChildBalancer = balancer.Get(pickfirstleaf.Name).Build(bd.ClientConn, bd.BuildOptions) + bd.ChildBalancer = balancer.Get(pickfirst.Name).Build(bd.ClientConn, bd.BuildOptions) }, UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { return bd.ChildBalancer.UpdateClientConnState(ccs) @@ -407,7 +407,7 @@ func (s) TestExitIdlePassthrough(t *testing.T) { bf := stub.BalancerFuncs{ Init: func(bd *stub.BalancerData) { - bd.ChildBalancer = lazy.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirstleaf.Name).Build) + bd.ChildBalancer = lazy.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirst.Name).Build) }, ExitIdle: func(bd *stub.BalancerData) { bd.ChildBalancer.ExitIdle() diff --git a/balancer/leastrequest/leastrequest.go b/balancer/leastrequest/leastrequest.go index f9cf7ccfc1ef..c7621eea91c4 100644 --- a/balancer/leastrequest/leastrequest.go +++ b/balancer/leastrequest/leastrequest.go @@ -28,7 +28,7 @@ import ( "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/endpointsharding" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" + "google.golang.org/grpc/balancer/pickfirst" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/grpclog" internalgrpclog "google.golang.org/grpc/internal/grpclog" @@ -90,7 +90,7 @@ func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Ba ClientConn: cc, endpointRPCCounts: resolver.NewEndpointMap[*atomic.Int32](), } - b.child = endpointsharding.NewBalancer(b, bOpts, balancer.Get(pickfirstleaf.Name).Build, endpointsharding.Options{}) + b.child = endpointsharding.NewBalancer(b, bOpts, balancer.Get(pickfirst.Name).Build, endpointsharding.Options{}) b.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf("[%p] ", b)) b.logger.Infof("Created") return b @@ -141,7 +141,7 @@ func (lrb *leastRequestBalancer) UpdateClientConnState(ccs balancer.ClientConnSt return lrb.child.UpdateClientConnState(balancer.ClientConnState{ // Enable the health listener in pickfirst children for client side health // checks and outlier detection, if configured. - ResolverState: pickfirstleaf.EnableHealthListener(ccs.ResolverState), + ResolverState: pickfirst.EnableHealthListener(ccs.ResolverState), }) } diff --git a/balancer/pickfirst/pickfirstleaf/metrics_test.go b/balancer/pickfirst/metrics_test.go similarity index 98% rename from balancer/pickfirst/pickfirstleaf/metrics_test.go rename to balancer/pickfirst/metrics_test.go index 4214f7ec2eae..0deb12715831 100644 --- a/balancer/pickfirst/pickfirstleaf/metrics_test.go +++ b/balancer/pickfirst/metrics_test.go @@ -16,7 +16,7 @@ * */ -package pickfirstleaf_test +package pickfirst_test import ( "context" @@ -24,7 +24,7 @@ import ( "testing" "google.golang.org/grpc" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" + "google.golang.org/grpc/balancer/pickfirst" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/internal" @@ -54,7 +54,7 @@ func init() { } } ] - }`, pickfirstleaf.Name) + }`, pickfirst.Name) } // TestPickFirstMetrics tests pick first metrics. It configures a pick first diff --git a/balancer/pickfirst/pickfirst.go b/balancer/pickfirst/pickfirst.go deleted file mode 100644 index b15c10e46b0a..000000000000 --- a/balancer/pickfirst/pickfirst.go +++ /dev/null @@ -1,291 +0,0 @@ -/* - * - * Copyright 2017 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -// Package pickfirst contains the pick_first load balancing policy. -package pickfirst - -import ( - "encoding/json" - "errors" - "fmt" - rand "math/rand/v2" - - "google.golang.org/grpc/balancer" - "google.golang.org/grpc/balancer/pickfirst/internal" - "google.golang.org/grpc/connectivity" - "google.golang.org/grpc/grpclog" - "google.golang.org/grpc/internal/envconfig" - internalgrpclog "google.golang.org/grpc/internal/grpclog" - "google.golang.org/grpc/internal/pretty" - "google.golang.org/grpc/resolver" - "google.golang.org/grpc/serviceconfig" - - _ "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" // For automatically registering the new pickfirst if required. -) - -func init() { - if envconfig.NewPickFirstEnabled { - return - } - balancer.Register(pickfirstBuilder{}) -} - -var logger = grpclog.Component("pick-first-lb") - -const ( - // Name is the name of the pick_first balancer. - Name = "pick_first" - logPrefix = "[pick-first-lb %p] " -) - -type pickfirstBuilder struct{} - -func (pickfirstBuilder) Build(cc balancer.ClientConn, _ balancer.BuildOptions) balancer.Balancer { - b := &pickfirstBalancer{cc: cc} - b.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf(logPrefix, b)) - return b -} - -func (pickfirstBuilder) Name() string { - return Name -} - -type pfConfig struct { - serviceconfig.LoadBalancingConfig `json:"-"` - - // If set to true, instructs the LB policy to shuffle the order of the list - // of endpoints received from the name resolver before attempting to - // connect to them. - ShuffleAddressList bool `json:"shuffleAddressList"` -} - -func (pickfirstBuilder) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { - var cfg pfConfig - if err := json.Unmarshal(js, &cfg); err != nil { - return nil, fmt.Errorf("pickfirst: unable to unmarshal LB policy config: %s, error: %v", string(js), err) - } - return cfg, nil -} - -type pickfirstBalancer struct { - logger *internalgrpclog.PrefixLogger - state connectivity.State - cc balancer.ClientConn - subConn balancer.SubConn -} - -func (b *pickfirstBalancer) ResolverError(err error) { - if b.logger.V(2) { - b.logger.Infof("Received error from the name resolver: %v", err) - } - if b.subConn == nil { - b.state = connectivity.TransientFailure - } - - if b.state != connectivity.TransientFailure { - // The picker will not change since the balancer does not currently - // report an error. - return - } - b.cc.UpdateState(balancer.State{ - ConnectivityState: connectivity.TransientFailure, - Picker: &picker{err: fmt.Errorf("name resolver error: %v", err)}, - }) -} - -// Shuffler is an interface for shuffling an address list. -type Shuffler interface { - ShuffleAddressListForTesting(n int, swap func(i, j int)) -} - -// ShuffleAddressListForTesting pseudo-randomizes the order of addresses. n -// is the number of elements. swap swaps the elements with indexes i and j. -func ShuffleAddressListForTesting(n int, swap func(i, j int)) { rand.Shuffle(n, swap) } - -func (b *pickfirstBalancer) UpdateClientConnState(state balancer.ClientConnState) error { - if len(state.ResolverState.Addresses) == 0 && len(state.ResolverState.Endpoints) == 0 { - // The resolver reported an empty address list. Treat it like an error by - // calling b.ResolverError. - if b.subConn != nil { - // Shut down the old subConn. All addresses were removed, so it is - // no longer valid. - b.subConn.Shutdown() - b.subConn = nil - } - b.ResolverError(errors.New("produced zero addresses")) - return balancer.ErrBadResolverState - } - // We don't have to guard this block with the env var because ParseConfig - // already does so. - cfg, ok := state.BalancerConfig.(pfConfig) - if state.BalancerConfig != nil && !ok { - return fmt.Errorf("pickfirst: received illegal BalancerConfig (type %T): %v", state.BalancerConfig, state.BalancerConfig) - } - - if b.logger.V(2) { - b.logger.Infof("Received new config %s, resolver state %s", pretty.ToJSON(cfg), pretty.ToJSON(state.ResolverState)) - } - - var addrs []resolver.Address - if endpoints := state.ResolverState.Endpoints; len(endpoints) != 0 { - // Perform the optional shuffling described in gRFC A62. The shuffling will - // change the order of endpoints but not touch the order of the addresses - // within each endpoint. - A61 - if cfg.ShuffleAddressList { - endpoints = append([]resolver.Endpoint{}, endpoints...) - internal.RandShuffle(len(endpoints), func(i, j int) { endpoints[i], endpoints[j] = endpoints[j], endpoints[i] }) - } - - // "Flatten the list by concatenating the ordered list of addresses for each - // of the endpoints, in order." - A61 - for _, endpoint := range endpoints { - // "In the flattened list, interleave addresses from the two address - // families, as per RFC-8304 section 4." - A61 - // TODO: support the above language. - addrs = append(addrs, endpoint.Addresses...) - } - } else { - // Endpoints not set, process addresses until we migrate resolver - // emissions fully to Endpoints. The top channel does wrap emitted - // addresses with endpoints, however some balancers such as weighted - // target do not forward the corresponding correct endpoints down/split - // endpoints properly. Once all balancers correctly forward endpoints - // down, can delete this else conditional. - addrs = state.ResolverState.Addresses - if cfg.ShuffleAddressList { - addrs = append([]resolver.Address{}, addrs...) - internal.RandShuffle(len(addrs), func(i, j int) { addrs[i], addrs[j] = addrs[j], addrs[i] }) - } - } - - if b.subConn != nil { - b.cc.UpdateAddresses(b.subConn, addrs) - return nil - } - - var subConn balancer.SubConn - subConn, err := b.cc.NewSubConn(addrs, balancer.NewSubConnOptions{ - StateListener: func(state balancer.SubConnState) { - b.updateSubConnState(subConn, state) - }, - }) - if err != nil { - if b.logger.V(2) { - b.logger.Infof("Failed to create new SubConn: %v", err) - } - b.state = connectivity.TransientFailure - b.cc.UpdateState(balancer.State{ - ConnectivityState: connectivity.TransientFailure, - Picker: &picker{err: fmt.Errorf("error creating connection: %v", err)}, - }) - return balancer.ErrBadResolverState - } - b.subConn = subConn - b.state = connectivity.Idle - b.cc.UpdateState(balancer.State{ - ConnectivityState: connectivity.Connecting, - Picker: &picker{err: balancer.ErrNoSubConnAvailable}, - }) - b.subConn.Connect() - return nil -} - -// UpdateSubConnState is unused as a StateListener is always registered when -// creating SubConns. -func (b *pickfirstBalancer) UpdateSubConnState(subConn balancer.SubConn, state balancer.SubConnState) { - b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", subConn, state) -} - -func (b *pickfirstBalancer) updateSubConnState(subConn balancer.SubConn, state balancer.SubConnState) { - if b.logger.V(2) { - b.logger.Infof("Received SubConn state update: %p, %+v", subConn, state) - } - if b.subConn != subConn { - if b.logger.V(2) { - b.logger.Infof("Ignored state change because subConn is not recognized") - } - return - } - if state.ConnectivityState == connectivity.Shutdown { - b.subConn = nil - return - } - - switch state.ConnectivityState { - case connectivity.Ready: - b.cc.UpdateState(balancer.State{ - ConnectivityState: state.ConnectivityState, - Picker: &picker{result: balancer.PickResult{SubConn: subConn}}, - }) - case connectivity.Connecting: - if b.state == connectivity.TransientFailure { - // We stay in TransientFailure until we are Ready. See A62. - return - } - b.cc.UpdateState(balancer.State{ - ConnectivityState: state.ConnectivityState, - Picker: &picker{err: balancer.ErrNoSubConnAvailable}, - }) - case connectivity.Idle: - if b.state == connectivity.TransientFailure { - // We stay in TransientFailure until we are Ready. Also kick the - // subConn out of Idle into Connecting. See A62. - b.subConn.Connect() - return - } - b.cc.UpdateState(balancer.State{ - ConnectivityState: state.ConnectivityState, - Picker: &idlePicker{subConn: subConn}, - }) - case connectivity.TransientFailure: - b.cc.UpdateState(balancer.State{ - ConnectivityState: state.ConnectivityState, - Picker: &picker{err: state.ConnectionError}, - }) - } - b.state = state.ConnectivityState -} - -func (b *pickfirstBalancer) Close() { -} - -func (b *pickfirstBalancer) ExitIdle() { - if b.subConn != nil && b.state == connectivity.Idle { - b.subConn.Connect() - } -} - -type picker struct { - result balancer.PickResult - err error -} - -func (p *picker) Pick(balancer.PickInfo) (balancer.PickResult, error) { - return p.result, p.err -} - -// idlePicker is used when the SubConn is IDLE and kicks the SubConn into -// CONNECTING when Pick is called. -type idlePicker struct { - subConn balancer.SubConn -} - -func (i *idlePicker) Pick(balancer.PickInfo) (balancer.PickResult, error) { - i.subConn.Connect() - return balancer.PickResult{}, balancer.ErrNoSubConnAvailable -} diff --git a/balancer/pickfirst/pickfirst_ext_test.go b/balancer/pickfirst/pickfirst_ext_test.go index 207fc4316f98..1c36855d97f5 100644 --- a/balancer/pickfirst/pickfirst_ext_test.go +++ b/balancer/pickfirst/pickfirst_ext_test.go @@ -38,7 +38,6 @@ import ( "google.golang.org/grpc/internal" "google.golang.org/grpc/internal/balancer/stub" "google.golang.org/grpc/internal/channelz" - "google.golang.org/grpc/internal/grpctest" "google.golang.org/grpc/internal/stubserver" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/testutils/pickfirst" @@ -54,24 +53,12 @@ import ( const ( pickFirstServiceConfig = `{"loadBalancingConfig": [{"pick_first":{}}]}` // Default timeout for tests in this package. - defaultTestTimeout = 10 * time.Second - // Default short timeout, to be used when waiting for events which are not - // expected to happen. - defaultTestShortTimeout = 100 * time.Millisecond ) func init() { channelz.TurnOn() } -type s struct { - grpctest.Tester -} - -func Test(t *testing.T) { - grpctest.RunSubTests(t, s{}) -} - // parseServiceConfig is a test helper which uses the manual resolver to parse // the given service config. It calls t.Fatal() if service config parsing fails. func parseServiceConfig(t *testing.T, r *manual.Resolver, sc string) *serviceconfig.ParseResult { diff --git a/balancer/pickfirst/pickfirstleaf.go b/balancer/pickfirst/pickfirstleaf.go new file mode 100644 index 000000000000..b9edb68fa568 --- /dev/null +++ b/balancer/pickfirst/pickfirstleaf.go @@ -0,0 +1,903 @@ +/* + * + * Copyright 2024 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package pickfirst contains the pick_first load balancing policy which +// is the universal leaf policy. +package pickfirst + +import ( + "encoding/json" + "errors" + "fmt" + "net" + "net/netip" + "sync" + "time" + + "google.golang.org/grpc/balancer" + "google.golang.org/grpc/balancer/pickfirst/internal" + "google.golang.org/grpc/connectivity" + expstats "google.golang.org/grpc/experimental/stats" + "google.golang.org/grpc/grpclog" + internalgrpclog "google.golang.org/grpc/internal/grpclog" + "google.golang.org/grpc/internal/pretty" + "google.golang.org/grpc/resolver" + "google.golang.org/grpc/serviceconfig" +) + +func init() { + balancer.Register(pickfirstBuilder{}) +} + +// Name is the name of the pick_first balancer. +const Name = "pick_first" + +// enableHealthListenerKeyType is a unique key type used in resolver +// attributes to indicate whether the health listener usage is enabled. +type enableHealthListenerKeyType struct{} + +var ( + logger = grpclog.Component("pick-first-leaf-lb") + disconnectionsMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ + Name: "grpc.lb.pick_first.disconnections", + Description: "EXPERIMENTAL. Number of times the selected subchannel becomes disconnected.", + Unit: "{disconnection}", + Labels: []string{"grpc.target"}, + Default: false, + }) + connectionAttemptsSucceededMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ + Name: "grpc.lb.pick_first.connection_attempts_succeeded", + Description: "EXPERIMENTAL. Number of successful connection attempts.", + Unit: "{attempt}", + Labels: []string{"grpc.target"}, + Default: false, + }) + connectionAttemptsFailedMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ + Name: "grpc.lb.pick_first.connection_attempts_failed", + Description: "EXPERIMENTAL. Number of failed connection attempts.", + Unit: "{attempt}", + Labels: []string{"grpc.target"}, + Default: false, + }) +) + +const ( + // TODO: change to pick-first when this becomes the default pick_first policy. + logPrefix = "[pick-first-leaf-lb %p] " + // connectionDelayInterval is the time to wait for during the happy eyeballs + // pass before starting the next connection attempt. + connectionDelayInterval = 250 * time.Millisecond +) + +type ipAddrFamily int + +const ( + // ipAddrFamilyUnknown represents strings that can't be parsed as an IP + // address. + ipAddrFamilyUnknown ipAddrFamily = iota + ipAddrFamilyV4 + ipAddrFamilyV6 +) + +type pickfirstBuilder struct{} + +func (pickfirstBuilder) Build(cc balancer.ClientConn, bo balancer.BuildOptions) balancer.Balancer { + b := &pickfirstBalancer{ + cc: cc, + target: bo.Target.String(), + metricsRecorder: cc.MetricsRecorder(), + + subConns: resolver.NewAddressMapV2[*scData](), + state: connectivity.Connecting, + cancelConnectionTimer: func() {}, + } + b.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf(logPrefix, b)) + return b +} + +func (b pickfirstBuilder) Name() string { + return Name +} + +func (pickfirstBuilder) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + var cfg pfConfig + if err := json.Unmarshal(js, &cfg); err != nil { + return nil, fmt.Errorf("pickfirst: unable to unmarshal LB policy config: %s, error: %v", string(js), err) + } + return cfg, nil +} + +// EnableHealthListener updates the state to configure pickfirst for using a +// generic health listener. +func EnableHealthListener(state resolver.State) resolver.State { + state.Attributes = state.Attributes.WithValue(enableHealthListenerKeyType{}, true) + return state +} + +type pfConfig struct { + serviceconfig.LoadBalancingConfig `json:"-"` + + // If set to true, instructs the LB policy to shuffle the order of the list + // of endpoints received from the name resolver before attempting to + // connect to them. + ShuffleAddressList bool `json:"shuffleAddressList"` +} + +// scData keeps track of the current state of the subConn. +// It is not safe for concurrent access. +type scData struct { + // The following fields are initialized at build time and read-only after + // that. + subConn balancer.SubConn + addr resolver.Address + + rawConnectivityState connectivity.State + // The effective connectivity state based on raw connectivity, health state + // and after following sticky TransientFailure behaviour defined in A62. + effectiveState connectivity.State + lastErr error + connectionFailedInFirstPass bool +} + +func (b *pickfirstBalancer) newSCData(addr resolver.Address) (*scData, error) { + sd := &scData{ + rawConnectivityState: connectivity.Idle, + effectiveState: connectivity.Idle, + addr: addr, + } + sc, err := b.cc.NewSubConn([]resolver.Address{addr}, balancer.NewSubConnOptions{ + StateListener: func(state balancer.SubConnState) { + b.updateSubConnState(sd, state) + }, + }) + if err != nil { + return nil, err + } + sd.subConn = sc + return sd, nil +} + +type pickfirstBalancer struct { + // The following fields are initialized at build time and read-only after + // that and therefore do not need to be guarded by a mutex. + logger *internalgrpclog.PrefixLogger + cc balancer.ClientConn + target string + metricsRecorder expstats.MetricsRecorder // guaranteed to be non nil + + // The mutex is used to ensure synchronization of updates triggered + // from the idle picker and the already serialized resolver, + // SubConn state updates. + mu sync.Mutex + // State reported to the channel based on SubConn states and resolver + // updates. + state connectivity.State + // scData for active subonns mapped by address. + subConns *resolver.AddressMapV2[*scData] + addressList addressList + firstPass bool + numTF int + cancelConnectionTimer func() + healthCheckingEnabled bool +} + +// ResolverError is called by the ClientConn when the name resolver produces +// an error or when pickfirst determined the resolver update to be invalid. +func (b *pickfirstBalancer) ResolverError(err error) { + b.mu.Lock() + defer b.mu.Unlock() + b.resolverErrorLocked(err) +} + +func (b *pickfirstBalancer) resolverErrorLocked(err error) { + if b.logger.V(2) { + b.logger.Infof("Received error from the name resolver: %v", err) + } + + // The picker will not change since the balancer does not currently + // report an error. If the balancer hasn't received a single good resolver + // update yet, transition to TRANSIENT_FAILURE. + if b.state != connectivity.TransientFailure && b.addressList.size() > 0 { + if b.logger.V(2) { + b.logger.Infof("Ignoring resolver error because balancer is using a previous good update.") + } + return + } + + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.TransientFailure, + Picker: &picker{err: fmt.Errorf("name resolver error: %v", err)}, + }) +} + +func (b *pickfirstBalancer) UpdateClientConnState(state balancer.ClientConnState) error { + b.mu.Lock() + defer b.mu.Unlock() + b.cancelConnectionTimer() + if len(state.ResolverState.Addresses) == 0 && len(state.ResolverState.Endpoints) == 0 { + // Cleanup state pertaining to the previous resolver state. + // Treat an empty address list like an error by calling b.ResolverError. + b.closeSubConnsLocked() + b.addressList.updateAddrs(nil) + b.resolverErrorLocked(errors.New("produced zero addresses")) + return balancer.ErrBadResolverState + } + b.healthCheckingEnabled = state.ResolverState.Attributes.Value(enableHealthListenerKeyType{}) != nil + cfg, ok := state.BalancerConfig.(pfConfig) + if state.BalancerConfig != nil && !ok { + return fmt.Errorf("pickfirst: received illegal BalancerConfig (type %T): %v: %w", state.BalancerConfig, state.BalancerConfig, balancer.ErrBadResolverState) + } + + if b.logger.V(2) { + b.logger.Infof("Received new config %s, resolver state %s", pretty.ToJSON(cfg), pretty.ToJSON(state.ResolverState)) + } + + var newAddrs []resolver.Address + if endpoints := state.ResolverState.Endpoints; len(endpoints) != 0 { + // Perform the optional shuffling described in gRFC A62. The shuffling + // will change the order of endpoints but not touch the order of the + // addresses within each endpoint. - A61 + if cfg.ShuffleAddressList { + endpoints = append([]resolver.Endpoint{}, endpoints...) + internal.RandShuffle(len(endpoints), func(i, j int) { endpoints[i], endpoints[j] = endpoints[j], endpoints[i] }) + } + + // "Flatten the list by concatenating the ordered list of addresses for + // each of the endpoints, in order." - A61 + for _, endpoint := range endpoints { + newAddrs = append(newAddrs, endpoint.Addresses...) + } + } else { + // Endpoints not set, process addresses until we migrate resolver + // emissions fully to Endpoints. The top channel does wrap emitted + // addresses with endpoints, however some balancers such as weighted + // target do not forward the corresponding correct endpoints down/split + // endpoints properly. Once all balancers correctly forward endpoints + // down, can delete this else conditional. + newAddrs = state.ResolverState.Addresses + if cfg.ShuffleAddressList { + newAddrs = append([]resolver.Address{}, newAddrs...) + internal.RandShuffle(len(newAddrs), func(i, j int) { newAddrs[i], newAddrs[j] = newAddrs[j], newAddrs[i] }) + } + } + + // If an address appears in multiple endpoints or in the same endpoint + // multiple times, we keep it only once. We will create only one SubConn + // for the address because an AddressMap is used to store SubConns. + // Not de-duplicating would result in attempting to connect to the same + // SubConn multiple times in the same pass. We don't want this. + newAddrs = deDupAddresses(newAddrs) + newAddrs = interleaveAddresses(newAddrs) + + prevAddr := b.addressList.currentAddress() + prevSCData, found := b.subConns.Get(prevAddr) + prevAddrsCount := b.addressList.size() + isPrevRawConnectivityStateReady := found && prevSCData.rawConnectivityState == connectivity.Ready + b.addressList.updateAddrs(newAddrs) + + // If the previous ready SubConn exists in new address list, + // keep this connection and don't create new SubConns. + if isPrevRawConnectivityStateReady && b.addressList.seekTo(prevAddr) { + return nil + } + + b.reconcileSubConnsLocked(newAddrs) + // If it's the first resolver update or the balancer was already READY + // (but the new address list does not contain the ready SubConn) or + // CONNECTING, enter CONNECTING. + // We may be in TRANSIENT_FAILURE due to a previous empty address list, + // we should still enter CONNECTING because the sticky TF behaviour + // mentioned in A62 applies only when the TRANSIENT_FAILURE is reported + // due to connectivity failures. + if isPrevRawConnectivityStateReady || b.state == connectivity.Connecting || prevAddrsCount == 0 { + // Start connection attempt at first address. + b.forceUpdateConcludedStateLocked(balancer.State{ + ConnectivityState: connectivity.Connecting, + Picker: &picker{err: balancer.ErrNoSubConnAvailable}, + }) + b.startFirstPassLocked() + } else if b.state == connectivity.TransientFailure { + // If we're in TRANSIENT_FAILURE, we stay in TRANSIENT_FAILURE until + // we're READY. See A62. + b.startFirstPassLocked() + } + return nil +} + +// UpdateSubConnState is unused as a StateListener is always registered when +// creating SubConns. +func (b *pickfirstBalancer) UpdateSubConnState(subConn balancer.SubConn, state balancer.SubConnState) { + b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", subConn, state) +} + +func (b *pickfirstBalancer) Close() { + b.mu.Lock() + defer b.mu.Unlock() + b.closeSubConnsLocked() + b.cancelConnectionTimer() + b.state = connectivity.Shutdown +} + +// ExitIdle moves the balancer out of idle state. It can be called concurrently +// by the idlePicker and clientConn so access to variables should be +// synchronized. +func (b *pickfirstBalancer) ExitIdle() { + b.mu.Lock() + defer b.mu.Unlock() + if b.state == connectivity.Idle { + // Move the balancer into CONNECTING state immediately. This is done to + // avoid staying in IDLE if a resolver update arrives before the first + // SubConn reports CONNECTING. + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.Connecting, + Picker: &picker{err: balancer.ErrNoSubConnAvailable}, + }) + b.startFirstPassLocked() + } +} + +func (b *pickfirstBalancer) startFirstPassLocked() { + b.firstPass = true + b.numTF = 0 + // Reset the connection attempt record for existing SubConns. + for _, sd := range b.subConns.Values() { + sd.connectionFailedInFirstPass = false + } + b.requestConnectionLocked() +} + +func (b *pickfirstBalancer) closeSubConnsLocked() { + for _, sd := range b.subConns.Values() { + sd.subConn.Shutdown() + } + b.subConns = resolver.NewAddressMapV2[*scData]() +} + +// deDupAddresses ensures that each address appears only once in the slice. +func deDupAddresses(addrs []resolver.Address) []resolver.Address { + seenAddrs := resolver.NewAddressMapV2[bool]() + retAddrs := []resolver.Address{} + + for _, addr := range addrs { + if _, ok := seenAddrs.Get(addr); ok { + continue + } + seenAddrs.Set(addr, true) + retAddrs = append(retAddrs, addr) + } + return retAddrs +} + +// interleaveAddresses interleaves addresses of both families (IPv4 and IPv6) +// as per RFC-8305 section 4. +// Whichever address family is first in the list is followed by an address of +// the other address family; that is, if the first address in the list is IPv6, +// then the first IPv4 address should be moved up in the list to be second in +// the list. It doesn't support configuring "First Address Family Count", i.e. +// there will always be a single member of the first address family at the +// beginning of the interleaved list. +// Addresses that are neither IPv4 nor IPv6 are treated as part of a third +// "unknown" family for interleaving. +// See: https://datatracker.ietf.org/doc/html/rfc8305#autoid-6 +func interleaveAddresses(addrs []resolver.Address) []resolver.Address { + familyAddrsMap := map[ipAddrFamily][]resolver.Address{} + interleavingOrder := []ipAddrFamily{} + for _, addr := range addrs { + family := addressFamily(addr.Addr) + if _, found := familyAddrsMap[family]; !found { + interleavingOrder = append(interleavingOrder, family) + } + familyAddrsMap[family] = append(familyAddrsMap[family], addr) + } + + interleavedAddrs := make([]resolver.Address, 0, len(addrs)) + + for curFamilyIdx := 0; len(interleavedAddrs) < len(addrs); curFamilyIdx = (curFamilyIdx + 1) % len(interleavingOrder) { + // Some IP types may have fewer addresses than others, so we look for + // the next type that has a remaining member to add to the interleaved + // list. + family := interleavingOrder[curFamilyIdx] + remainingMembers := familyAddrsMap[family] + if len(remainingMembers) > 0 { + interleavedAddrs = append(interleavedAddrs, remainingMembers[0]) + familyAddrsMap[family] = remainingMembers[1:] + } + } + + return interleavedAddrs +} + +// addressFamily returns the ipAddrFamily after parsing the address string. +// If the address isn't of the format "ip-address:port", it returns +// ipAddrFamilyUnknown. The address may be valid even if it's not an IP when +// using a resolver like passthrough where the address may be a hostname in +// some format that the dialer can resolve. +func addressFamily(address string) ipAddrFamily { + // Parse the IP after removing the port. + host, _, err := net.SplitHostPort(address) + if err != nil { + return ipAddrFamilyUnknown + } + ip, err := netip.ParseAddr(host) + if err != nil { + return ipAddrFamilyUnknown + } + switch { + case ip.Is4() || ip.Is4In6(): + return ipAddrFamilyV4 + case ip.Is6(): + return ipAddrFamilyV6 + default: + return ipAddrFamilyUnknown + } +} + +// reconcileSubConnsLocked updates the active subchannels based on a new address +// list from the resolver. It does this by: +// - closing subchannels: any existing subchannels associated with addresses +// that are no longer in the updated list are shut down. +// - removing subchannels: entries for these closed subchannels are removed +// from the subchannel map. +// +// This ensures that the subchannel map accurately reflects the current set of +// addresses received from the name resolver. +func (b *pickfirstBalancer) reconcileSubConnsLocked(newAddrs []resolver.Address) { + newAddrsMap := resolver.NewAddressMapV2[bool]() + for _, addr := range newAddrs { + newAddrsMap.Set(addr, true) + } + + for _, oldAddr := range b.subConns.Keys() { + if _, ok := newAddrsMap.Get(oldAddr); ok { + continue + } + val, _ := b.subConns.Get(oldAddr) + val.subConn.Shutdown() + b.subConns.Delete(oldAddr) + } +} + +// shutdownRemainingLocked shuts down remaining subConns. Called when a subConn +// becomes ready, which means that all other subConn must be shutdown. +func (b *pickfirstBalancer) shutdownRemainingLocked(selected *scData) { + b.cancelConnectionTimer() + for _, sd := range b.subConns.Values() { + if sd.subConn != selected.subConn { + sd.subConn.Shutdown() + } + } + b.subConns = resolver.NewAddressMapV2[*scData]() + b.subConns.Set(selected.addr, selected) +} + +// requestConnectionLocked starts connecting on the subchannel corresponding to +// the current address. If no subchannel exists, one is created. If the current +// subchannel is in TransientFailure, a connection to the next address is +// attempted until a subchannel is found. +func (b *pickfirstBalancer) requestConnectionLocked() { + if !b.addressList.isValid() { + return + } + var lastErr error + for valid := true; valid; valid = b.addressList.increment() { + curAddr := b.addressList.currentAddress() + sd, ok := b.subConns.Get(curAddr) + if !ok { + var err error + // We want to assign the new scData to sd from the outer scope, + // hence we can't use := below. + sd, err = b.newSCData(curAddr) + if err != nil { + // This should never happen, unless the clientConn is being shut + // down. + if b.logger.V(2) { + b.logger.Infof("Failed to create a subConn for address %v: %v", curAddr.String(), err) + } + // Do nothing, the LB policy will be closed soon. + return + } + b.subConns.Set(curAddr, sd) + } + + switch sd.rawConnectivityState { + case connectivity.Idle: + sd.subConn.Connect() + b.scheduleNextConnectionLocked() + return + case connectivity.TransientFailure: + // The SubConn is being re-used and failed during a previous pass + // over the addressList. It has not completed backoff yet. + // Mark it as having failed and try the next address. + sd.connectionFailedInFirstPass = true + lastErr = sd.lastErr + continue + case connectivity.Connecting: + // Wait for the connection attempt to complete or the timer to fire + // before attempting the next address. + b.scheduleNextConnectionLocked() + return + default: + b.logger.Errorf("SubConn with unexpected state %v present in SubConns map.", sd.rawConnectivityState) + return + + } + } + + // All the remaining addresses in the list are in TRANSIENT_FAILURE, end the + // first pass if possible. + b.endFirstPassIfPossibleLocked(lastErr) +} + +func (b *pickfirstBalancer) scheduleNextConnectionLocked() { + b.cancelConnectionTimer() + if !b.addressList.hasNext() { + return + } + curAddr := b.addressList.currentAddress() + cancelled := false // Access to this is protected by the balancer's mutex. + closeFn := internal.TimeAfterFunc(connectionDelayInterval, func() { + b.mu.Lock() + defer b.mu.Unlock() + // If the scheduled task is cancelled while acquiring the mutex, return. + if cancelled { + return + } + if b.logger.V(2) { + b.logger.Infof("Happy Eyeballs timer expired while waiting for connection to %q.", curAddr.Addr) + } + if b.addressList.increment() { + b.requestConnectionLocked() + } + }) + // Access to the cancellation callback held by the balancer is guarded by + // the balancer's mutex, so it's safe to set the boolean from the callback. + b.cancelConnectionTimer = sync.OnceFunc(func() { + cancelled = true + closeFn() + }) +} + +func (b *pickfirstBalancer) updateSubConnState(sd *scData, newState balancer.SubConnState) { + b.mu.Lock() + defer b.mu.Unlock() + oldState := sd.rawConnectivityState + sd.rawConnectivityState = newState.ConnectivityState + // Previously relevant SubConns can still callback with state updates. + // To prevent pickers from returning these obsolete SubConns, this logic + // is included to check if the current list of active SubConns includes this + // SubConn. + if !b.isActiveSCData(sd) { + return + } + if newState.ConnectivityState == connectivity.Shutdown { + sd.effectiveState = connectivity.Shutdown + return + } + + // Record a connection attempt when exiting CONNECTING. + if newState.ConnectivityState == connectivity.TransientFailure { + sd.connectionFailedInFirstPass = true + connectionAttemptsFailedMetric.Record(b.metricsRecorder, 1, b.target) + } + + if newState.ConnectivityState == connectivity.Ready { + connectionAttemptsSucceededMetric.Record(b.metricsRecorder, 1, b.target) + b.shutdownRemainingLocked(sd) + if !b.addressList.seekTo(sd.addr) { + // This should not fail as we should have only one SubConn after + // entering READY. The SubConn should be present in the addressList. + b.logger.Errorf("Address %q not found address list in %v", sd.addr, b.addressList.addresses) + return + } + if !b.healthCheckingEnabled { + if b.logger.V(2) { + b.logger.Infof("SubConn %p reported connectivity state READY and the health listener is disabled. Transitioning SubConn to READY.", sd.subConn) + } + + sd.effectiveState = connectivity.Ready + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.Ready, + Picker: &picker{result: balancer.PickResult{SubConn: sd.subConn}}, + }) + return + } + if b.logger.V(2) { + b.logger.Infof("SubConn %p reported connectivity state READY. Registering health listener.", sd.subConn) + } + // Send a CONNECTING update to take the SubConn out of sticky-TF if + // required. + sd.effectiveState = connectivity.Connecting + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.Connecting, + Picker: &picker{err: balancer.ErrNoSubConnAvailable}, + }) + sd.subConn.RegisterHealthListener(func(scs balancer.SubConnState) { + b.updateSubConnHealthState(sd, scs) + }) + return + } + + // If the LB policy is READY, and it receives a subchannel state change, + // it means that the READY subchannel has failed. + // A SubConn can also transition from CONNECTING directly to IDLE when + // a transport is successfully created, but the connection fails + // before the SubConn can send the notification for READY. We treat + // this as a successful connection and transition to IDLE. + // TODO: https://github.com/grpc/grpc-go/issues/7862 - Remove the second + // part of the if condition below once the issue is fixed. + if oldState == connectivity.Ready || (oldState == connectivity.Connecting && newState.ConnectivityState == connectivity.Idle) { + // Once a transport fails, the balancer enters IDLE and starts from + // the first address when the picker is used. + b.shutdownRemainingLocked(sd) + sd.effectiveState = newState.ConnectivityState + // READY SubConn interspliced in between CONNECTING and IDLE, need to + // account for that. + if oldState == connectivity.Connecting { + // A known issue (https://github.com/grpc/grpc-go/issues/7862) + // causes a race that prevents the READY state change notification. + // This works around it. + connectionAttemptsSucceededMetric.Record(b.metricsRecorder, 1, b.target) + } + disconnectionsMetric.Record(b.metricsRecorder, 1, b.target) + b.addressList.reset() + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.Idle, + Picker: &idlePicker{exitIdle: sync.OnceFunc(b.ExitIdle)}, + }) + return + } + + if b.firstPass { + switch newState.ConnectivityState { + case connectivity.Connecting: + // The effective state can be in either IDLE, CONNECTING or + // TRANSIENT_FAILURE. If it's TRANSIENT_FAILURE, stay in + // TRANSIENT_FAILURE until it's READY. See A62. + if sd.effectiveState != connectivity.TransientFailure { + sd.effectiveState = connectivity.Connecting + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.Connecting, + Picker: &picker{err: balancer.ErrNoSubConnAvailable}, + }) + } + case connectivity.TransientFailure: + sd.lastErr = newState.ConnectionError + sd.effectiveState = connectivity.TransientFailure + // Since we're re-using common SubConns while handling resolver + // updates, we could receive an out of turn TRANSIENT_FAILURE from + // a pass over the previous address list. Happy Eyeballs will also + // cause out of order updates to arrive. + + if curAddr := b.addressList.currentAddress(); equalAddressIgnoringBalAttributes(&curAddr, &sd.addr) { + b.cancelConnectionTimer() + if b.addressList.increment() { + b.requestConnectionLocked() + return + } + } + + // End the first pass if we've seen a TRANSIENT_FAILURE from all + // SubConns once. + b.endFirstPassIfPossibleLocked(newState.ConnectionError) + } + return + } + + // We have finished the first pass, keep re-connecting failing SubConns. + switch newState.ConnectivityState { + case connectivity.TransientFailure: + b.numTF = (b.numTF + 1) % b.subConns.Len() + sd.lastErr = newState.ConnectionError + if b.numTF%b.subConns.Len() == 0 { + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.TransientFailure, + Picker: &picker{err: newState.ConnectionError}, + }) + } + // We don't need to request re-resolution since the SubConn already + // does that before reporting TRANSIENT_FAILURE. + // TODO: #7534 - Move re-resolution requests from SubConn into + // pick_first. + case connectivity.Idle: + sd.subConn.Connect() + } +} + +// endFirstPassIfPossibleLocked ends the first happy-eyeballs pass if all the +// addresses are tried and their SubConns have reported a failure. +func (b *pickfirstBalancer) endFirstPassIfPossibleLocked(lastErr error) { + // An optimization to avoid iterating over the entire SubConn map. + if b.addressList.isValid() { + return + } + // Connect() has been called on all the SubConns. The first pass can be + // ended if all the SubConns have reported a failure. + for _, sd := range b.subConns.Values() { + if !sd.connectionFailedInFirstPass { + return + } + } + b.firstPass = false + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.TransientFailure, + Picker: &picker{err: lastErr}, + }) + // Start re-connecting all the SubConns that are already in IDLE. + for _, sd := range b.subConns.Values() { + if sd.rawConnectivityState == connectivity.Idle { + sd.subConn.Connect() + } + } +} + +func (b *pickfirstBalancer) isActiveSCData(sd *scData) bool { + activeSD, found := b.subConns.Get(sd.addr) + return found && activeSD == sd +} + +func (b *pickfirstBalancer) updateSubConnHealthState(sd *scData, state balancer.SubConnState) { + b.mu.Lock() + defer b.mu.Unlock() + // Previously relevant SubConns can still callback with state updates. + // To prevent pickers from returning these obsolete SubConns, this logic + // is included to check if the current list of active SubConns includes + // this SubConn. + if !b.isActiveSCData(sd) { + return + } + sd.effectiveState = state.ConnectivityState + switch state.ConnectivityState { + case connectivity.Ready: + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.Ready, + Picker: &picker{result: balancer.PickResult{SubConn: sd.subConn}}, + }) + case connectivity.TransientFailure: + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.TransientFailure, + Picker: &picker{err: fmt.Errorf("pickfirst: health check failure: %v", state.ConnectionError)}, + }) + case connectivity.Connecting: + b.updateBalancerState(balancer.State{ + ConnectivityState: connectivity.Connecting, + Picker: &picker{err: balancer.ErrNoSubConnAvailable}, + }) + default: + b.logger.Errorf("Got unexpected health update for SubConn %p: %v", state) + } +} + +// updateBalancerState stores the state reported to the channel and calls +// ClientConn.UpdateState(). As an optimization, it avoids sending duplicate +// updates to the channel. +func (b *pickfirstBalancer) updateBalancerState(newState balancer.State) { + // In case of TransientFailures allow the picker to be updated to update + // the connectivity error, in all other cases don't send duplicate state + // updates. + if newState.ConnectivityState == b.state && b.state != connectivity.TransientFailure { + return + } + b.forceUpdateConcludedStateLocked(newState) +} + +// forceUpdateConcludedStateLocked stores the state reported to the channel and +// calls ClientConn.UpdateState(). +// A separate function is defined to force update the ClientConn state since the +// channel doesn't correctly assume that LB policies start in CONNECTING and +// relies on LB policy to send an initial CONNECTING update. +func (b *pickfirstBalancer) forceUpdateConcludedStateLocked(newState balancer.State) { + b.state = newState.ConnectivityState + b.cc.UpdateState(newState) +} + +type picker struct { + result balancer.PickResult + err error +} + +func (p *picker) Pick(balancer.PickInfo) (balancer.PickResult, error) { + return p.result, p.err +} + +// idlePicker is used when the SubConn is IDLE and kicks the SubConn into +// CONNECTING when Pick is called. +type idlePicker struct { + exitIdle func() +} + +func (i *idlePicker) Pick(balancer.PickInfo) (balancer.PickResult, error) { + i.exitIdle() + return balancer.PickResult{}, balancer.ErrNoSubConnAvailable +} + +// addressList manages sequentially iterating over addresses present in a list +// of endpoints. It provides a 1 dimensional view of the addresses present in +// the endpoints. +// This type is not safe for concurrent access. +type addressList struct { + addresses []resolver.Address + idx int +} + +func (al *addressList) isValid() bool { + return al.idx < len(al.addresses) +} + +func (al *addressList) size() int { + return len(al.addresses) +} + +// increment moves to the next index in the address list. +// This method returns false if it went off the list, true otherwise. +func (al *addressList) increment() bool { + if !al.isValid() { + return false + } + al.idx++ + return al.idx < len(al.addresses) +} + +// currentAddress returns the current address pointed to in the addressList. +// If the list is in an invalid state, it returns an empty address instead. +func (al *addressList) currentAddress() resolver.Address { + if !al.isValid() { + return resolver.Address{} + } + return al.addresses[al.idx] +} + +func (al *addressList) reset() { + al.idx = 0 +} + +func (al *addressList) updateAddrs(addrs []resolver.Address) { + al.addresses = addrs + al.reset() +} + +// seekTo returns false if the needle was not found and the current index was +// left unchanged. +func (al *addressList) seekTo(needle resolver.Address) bool { + for ai, addr := range al.addresses { + if !equalAddressIgnoringBalAttributes(&addr, &needle) { + continue + } + al.idx = ai + return true + } + return false +} + +// hasNext returns whether incrementing the addressList will result in moving +// past the end of the list. If the list has already moved past the end, it +// returns false. +func (al *addressList) hasNext() bool { + if !al.isValid() { + return false + } + return al.idx+1 < len(al.addresses) +} + +// equalAddressIgnoringBalAttributes returns true is a and b are considered +// equal. This is different from the Equal method on the resolver.Address type +// which considers all fields to determine equality. Here, we only consider +// fields that are meaningful to the SubConn. +func equalAddressIgnoringBalAttributes(a, b *resolver.Address) bool { + return a.Addr == b.Addr && a.ServerName == b.ServerName && + a.Attributes.Equal(b.Attributes) +} diff --git a/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go b/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go index 780cdc4f8967..824b854c66ba 100644 --- a/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go +++ b/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go @@ -25,890 +25,6 @@ // later release. package pickfirstleaf -import ( - "encoding/json" - "errors" - "fmt" - "net" - "net/netip" - "sync" - "time" - - "google.golang.org/grpc/balancer" - "google.golang.org/grpc/balancer/pickfirst/internal" - "google.golang.org/grpc/connectivity" - expstats "google.golang.org/grpc/experimental/stats" - "google.golang.org/grpc/grpclog" - "google.golang.org/grpc/internal/envconfig" - internalgrpclog "google.golang.org/grpc/internal/grpclog" - "google.golang.org/grpc/internal/pretty" - "google.golang.org/grpc/resolver" - "google.golang.org/grpc/serviceconfig" -) - -func init() { - if envconfig.NewPickFirstEnabled { - // Register as the default pick_first balancer. - Name = "pick_first" - } - balancer.Register(pickfirstBuilder{}) -} - -// enableHealthListenerKeyType is a unique key type used in resolver -// attributes to indicate whether the health listener usage is enabled. -type enableHealthListenerKeyType struct{} - -var ( - logger = grpclog.Component("pick-first-leaf-lb") - // Name is the name of the pick_first_leaf balancer. - // It is changed to "pick_first" in init() if this balancer is to be - // registered as the default pickfirst. - Name = "pick_first_leaf" - disconnectionsMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ - Name: "grpc.lb.pick_first.disconnections", - Description: "EXPERIMENTAL. Number of times the selected subchannel becomes disconnected.", - Unit: "{disconnection}", - Labels: []string{"grpc.target"}, - Default: false, - }) - connectionAttemptsSucceededMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ - Name: "grpc.lb.pick_first.connection_attempts_succeeded", - Description: "EXPERIMENTAL. Number of successful connection attempts.", - Unit: "{attempt}", - Labels: []string{"grpc.target"}, - Default: false, - }) - connectionAttemptsFailedMetric = expstats.RegisterInt64Count(expstats.MetricDescriptor{ - Name: "grpc.lb.pick_first.connection_attempts_failed", - Description: "EXPERIMENTAL. Number of failed connection attempts.", - Unit: "{attempt}", - Labels: []string{"grpc.target"}, - Default: false, - }) -) - -const ( - // TODO: change to pick-first when this becomes the default pick_first policy. - logPrefix = "[pick-first-leaf-lb %p] " - // connectionDelayInterval is the time to wait for during the happy eyeballs - // pass before starting the next connection attempt. - connectionDelayInterval = 250 * time.Millisecond -) - -type ipAddrFamily int - -const ( - // ipAddrFamilyUnknown represents strings that can't be parsed as an IP - // address. - ipAddrFamilyUnknown ipAddrFamily = iota - ipAddrFamilyV4 - ipAddrFamilyV6 -) - -type pickfirstBuilder struct{} - -func (pickfirstBuilder) Build(cc balancer.ClientConn, bo balancer.BuildOptions) balancer.Balancer { - b := &pickfirstBalancer{ - cc: cc, - target: bo.Target.String(), - metricsRecorder: cc.MetricsRecorder(), - - subConns: resolver.NewAddressMapV2[*scData](), - state: connectivity.Connecting, - cancelConnectionTimer: func() {}, - } - b.logger = internalgrpclog.NewPrefixLogger(logger, fmt.Sprintf(logPrefix, b)) - return b -} - -func (b pickfirstBuilder) Name() string { - return Name -} - -func (pickfirstBuilder) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { - var cfg pfConfig - if err := json.Unmarshal(js, &cfg); err != nil { - return nil, fmt.Errorf("pickfirst: unable to unmarshal LB policy config: %s, error: %v", string(js), err) - } - return cfg, nil -} - -// EnableHealthListener updates the state to configure pickfirst for using a -// generic health listener. -func EnableHealthListener(state resolver.State) resolver.State { - state.Attributes = state.Attributes.WithValue(enableHealthListenerKeyType{}, true) - return state -} - -type pfConfig struct { - serviceconfig.LoadBalancingConfig `json:"-"` - - // If set to true, instructs the LB policy to shuffle the order of the list - // of endpoints received from the name resolver before attempting to - // connect to them. - ShuffleAddressList bool `json:"shuffleAddressList"` -} - -// scData keeps track of the current state of the subConn. -// It is not safe for concurrent access. -type scData struct { - // The following fields are initialized at build time and read-only after - // that. - subConn balancer.SubConn - addr resolver.Address - - rawConnectivityState connectivity.State - // The effective connectivity state based on raw connectivity, health state - // and after following sticky TransientFailure behaviour defined in A62. - effectiveState connectivity.State - lastErr error - connectionFailedInFirstPass bool -} - -func (b *pickfirstBalancer) newSCData(addr resolver.Address) (*scData, error) { - sd := &scData{ - rawConnectivityState: connectivity.Idle, - effectiveState: connectivity.Idle, - addr: addr, - } - sc, err := b.cc.NewSubConn([]resolver.Address{addr}, balancer.NewSubConnOptions{ - StateListener: func(state balancer.SubConnState) { - b.updateSubConnState(sd, state) - }, - }) - if err != nil { - return nil, err - } - sd.subConn = sc - return sd, nil -} - -type pickfirstBalancer struct { - // The following fields are initialized at build time and read-only after - // that and therefore do not need to be guarded by a mutex. - logger *internalgrpclog.PrefixLogger - cc balancer.ClientConn - target string - metricsRecorder expstats.MetricsRecorder // guaranteed to be non nil - - // The mutex is used to ensure synchronization of updates triggered - // from the idle picker and the already serialized resolver, - // SubConn state updates. - mu sync.Mutex - // State reported to the channel based on SubConn states and resolver - // updates. - state connectivity.State - // scData for active subonns mapped by address. - subConns *resolver.AddressMapV2[*scData] - addressList addressList - firstPass bool - numTF int - cancelConnectionTimer func() - healthCheckingEnabled bool -} - -// ResolverError is called by the ClientConn when the name resolver produces -// an error or when pickfirst determined the resolver update to be invalid. -func (b *pickfirstBalancer) ResolverError(err error) { - b.mu.Lock() - defer b.mu.Unlock() - b.resolverErrorLocked(err) -} - -func (b *pickfirstBalancer) resolverErrorLocked(err error) { - if b.logger.V(2) { - b.logger.Infof("Received error from the name resolver: %v", err) - } - - // The picker will not change since the balancer does not currently - // report an error. If the balancer hasn't received a single good resolver - // update yet, transition to TRANSIENT_FAILURE. - if b.state != connectivity.TransientFailure && b.addressList.size() > 0 { - if b.logger.V(2) { - b.logger.Infof("Ignoring resolver error because balancer is using a previous good update.") - } - return - } - - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.TransientFailure, - Picker: &picker{err: fmt.Errorf("name resolver error: %v", err)}, - }) -} - -func (b *pickfirstBalancer) UpdateClientConnState(state balancer.ClientConnState) error { - b.mu.Lock() - defer b.mu.Unlock() - b.cancelConnectionTimer() - if len(state.ResolverState.Addresses) == 0 && len(state.ResolverState.Endpoints) == 0 { - // Cleanup state pertaining to the previous resolver state. - // Treat an empty address list like an error by calling b.ResolverError. - b.closeSubConnsLocked() - b.addressList.updateAddrs(nil) - b.resolverErrorLocked(errors.New("produced zero addresses")) - return balancer.ErrBadResolverState - } - b.healthCheckingEnabled = state.ResolverState.Attributes.Value(enableHealthListenerKeyType{}) != nil - cfg, ok := state.BalancerConfig.(pfConfig) - if state.BalancerConfig != nil && !ok { - return fmt.Errorf("pickfirst: received illegal BalancerConfig (type %T): %v: %w", state.BalancerConfig, state.BalancerConfig, balancer.ErrBadResolverState) - } - - if b.logger.V(2) { - b.logger.Infof("Received new config %s, resolver state %s", pretty.ToJSON(cfg), pretty.ToJSON(state.ResolverState)) - } - - var newAddrs []resolver.Address - if endpoints := state.ResolverState.Endpoints; len(endpoints) != 0 { - // Perform the optional shuffling described in gRFC A62. The shuffling - // will change the order of endpoints but not touch the order of the - // addresses within each endpoint. - A61 - if cfg.ShuffleAddressList { - endpoints = append([]resolver.Endpoint{}, endpoints...) - internal.RandShuffle(len(endpoints), func(i, j int) { endpoints[i], endpoints[j] = endpoints[j], endpoints[i] }) - } - - // "Flatten the list by concatenating the ordered list of addresses for - // each of the endpoints, in order." - A61 - for _, endpoint := range endpoints { - newAddrs = append(newAddrs, endpoint.Addresses...) - } - } else { - // Endpoints not set, process addresses until we migrate resolver - // emissions fully to Endpoints. The top channel does wrap emitted - // addresses with endpoints, however some balancers such as weighted - // target do not forward the corresponding correct endpoints down/split - // endpoints properly. Once all balancers correctly forward endpoints - // down, can delete this else conditional. - newAddrs = state.ResolverState.Addresses - if cfg.ShuffleAddressList { - newAddrs = append([]resolver.Address{}, newAddrs...) - internal.RandShuffle(len(newAddrs), func(i, j int) { newAddrs[i], newAddrs[j] = newAddrs[j], newAddrs[i] }) - } - } - - // If an address appears in multiple endpoints or in the same endpoint - // multiple times, we keep it only once. We will create only one SubConn - // for the address because an AddressMap is used to store SubConns. - // Not de-duplicating would result in attempting to connect to the same - // SubConn multiple times in the same pass. We don't want this. - newAddrs = deDupAddresses(newAddrs) - newAddrs = interleaveAddresses(newAddrs) - - prevAddr := b.addressList.currentAddress() - prevSCData, found := b.subConns.Get(prevAddr) - prevAddrsCount := b.addressList.size() - isPrevRawConnectivityStateReady := found && prevSCData.rawConnectivityState == connectivity.Ready - b.addressList.updateAddrs(newAddrs) - - // If the previous ready SubConn exists in new address list, - // keep this connection and don't create new SubConns. - if isPrevRawConnectivityStateReady && b.addressList.seekTo(prevAddr) { - return nil - } - - b.reconcileSubConnsLocked(newAddrs) - // If it's the first resolver update or the balancer was already READY - // (but the new address list does not contain the ready SubConn) or - // CONNECTING, enter CONNECTING. - // We may be in TRANSIENT_FAILURE due to a previous empty address list, - // we should still enter CONNECTING because the sticky TF behaviour - // mentioned in A62 applies only when the TRANSIENT_FAILURE is reported - // due to connectivity failures. - if isPrevRawConnectivityStateReady || b.state == connectivity.Connecting || prevAddrsCount == 0 { - // Start connection attempt at first address. - b.forceUpdateConcludedStateLocked(balancer.State{ - ConnectivityState: connectivity.Connecting, - Picker: &picker{err: balancer.ErrNoSubConnAvailable}, - }) - b.startFirstPassLocked() - } else if b.state == connectivity.TransientFailure { - // If we're in TRANSIENT_FAILURE, we stay in TRANSIENT_FAILURE until - // we're READY. See A62. - b.startFirstPassLocked() - } - return nil -} - -// UpdateSubConnState is unused as a StateListener is always registered when -// creating SubConns. -func (b *pickfirstBalancer) UpdateSubConnState(subConn balancer.SubConn, state balancer.SubConnState) { - b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", subConn, state) -} - -func (b *pickfirstBalancer) Close() { - b.mu.Lock() - defer b.mu.Unlock() - b.closeSubConnsLocked() - b.cancelConnectionTimer() - b.state = connectivity.Shutdown -} - -// ExitIdle moves the balancer out of idle state. It can be called concurrently -// by the idlePicker and clientConn so access to variables should be -// synchronized. -func (b *pickfirstBalancer) ExitIdle() { - b.mu.Lock() - defer b.mu.Unlock() - if b.state == connectivity.Idle { - // Move the balancer into CONNECTING state immediately. This is done to - // avoid staying in IDLE if a resolver update arrives before the first - // SubConn reports CONNECTING. - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.Connecting, - Picker: &picker{err: balancer.ErrNoSubConnAvailable}, - }) - b.startFirstPassLocked() - } -} - -func (b *pickfirstBalancer) startFirstPassLocked() { - b.firstPass = true - b.numTF = 0 - // Reset the connection attempt record for existing SubConns. - for _, sd := range b.subConns.Values() { - sd.connectionFailedInFirstPass = false - } - b.requestConnectionLocked() -} - -func (b *pickfirstBalancer) closeSubConnsLocked() { - for _, sd := range b.subConns.Values() { - sd.subConn.Shutdown() - } - b.subConns = resolver.NewAddressMapV2[*scData]() -} - -// deDupAddresses ensures that each address appears only once in the slice. -func deDupAddresses(addrs []resolver.Address) []resolver.Address { - seenAddrs := resolver.NewAddressMapV2[bool]() - retAddrs := []resolver.Address{} - - for _, addr := range addrs { - if _, ok := seenAddrs.Get(addr); ok { - continue - } - seenAddrs.Set(addr, true) - retAddrs = append(retAddrs, addr) - } - return retAddrs -} - -// interleaveAddresses interleaves addresses of both families (IPv4 and IPv6) -// as per RFC-8305 section 4. -// Whichever address family is first in the list is followed by an address of -// the other address family; that is, if the first address in the list is IPv6, -// then the first IPv4 address should be moved up in the list to be second in -// the list. It doesn't support configuring "First Address Family Count", i.e. -// there will always be a single member of the first address family at the -// beginning of the interleaved list. -// Addresses that are neither IPv4 nor IPv6 are treated as part of a third -// "unknown" family for interleaving. -// See: https://datatracker.ietf.org/doc/html/rfc8305#autoid-6 -func interleaveAddresses(addrs []resolver.Address) []resolver.Address { - familyAddrsMap := map[ipAddrFamily][]resolver.Address{} - interleavingOrder := []ipAddrFamily{} - for _, addr := range addrs { - family := addressFamily(addr.Addr) - if _, found := familyAddrsMap[family]; !found { - interleavingOrder = append(interleavingOrder, family) - } - familyAddrsMap[family] = append(familyAddrsMap[family], addr) - } - - interleavedAddrs := make([]resolver.Address, 0, len(addrs)) - - for curFamilyIdx := 0; len(interleavedAddrs) < len(addrs); curFamilyIdx = (curFamilyIdx + 1) % len(interleavingOrder) { - // Some IP types may have fewer addresses than others, so we look for - // the next type that has a remaining member to add to the interleaved - // list. - family := interleavingOrder[curFamilyIdx] - remainingMembers := familyAddrsMap[family] - if len(remainingMembers) > 0 { - interleavedAddrs = append(interleavedAddrs, remainingMembers[0]) - familyAddrsMap[family] = remainingMembers[1:] - } - } - - return interleavedAddrs -} - -// addressFamily returns the ipAddrFamily after parsing the address string. -// If the address isn't of the format "ip-address:port", it returns -// ipAddrFamilyUnknown. The address may be valid even if it's not an IP when -// using a resolver like passthrough where the address may be a hostname in -// some format that the dialer can resolve. -func addressFamily(address string) ipAddrFamily { - // Parse the IP after removing the port. - host, _, err := net.SplitHostPort(address) - if err != nil { - return ipAddrFamilyUnknown - } - ip, err := netip.ParseAddr(host) - if err != nil { - return ipAddrFamilyUnknown - } - switch { - case ip.Is4() || ip.Is4In6(): - return ipAddrFamilyV4 - case ip.Is6(): - return ipAddrFamilyV6 - default: - return ipAddrFamilyUnknown - } -} - -// reconcileSubConnsLocked updates the active subchannels based on a new address -// list from the resolver. It does this by: -// - closing subchannels: any existing subchannels associated with addresses -// that are no longer in the updated list are shut down. -// - removing subchannels: entries for these closed subchannels are removed -// from the subchannel map. -// -// This ensures that the subchannel map accurately reflects the current set of -// addresses received from the name resolver. -func (b *pickfirstBalancer) reconcileSubConnsLocked(newAddrs []resolver.Address) { - newAddrsMap := resolver.NewAddressMapV2[bool]() - for _, addr := range newAddrs { - newAddrsMap.Set(addr, true) - } - - for _, oldAddr := range b.subConns.Keys() { - if _, ok := newAddrsMap.Get(oldAddr); ok { - continue - } - val, _ := b.subConns.Get(oldAddr) - val.subConn.Shutdown() - b.subConns.Delete(oldAddr) - } -} - -// shutdownRemainingLocked shuts down remaining subConns. Called when a subConn -// becomes ready, which means that all other subConn must be shutdown. -func (b *pickfirstBalancer) shutdownRemainingLocked(selected *scData) { - b.cancelConnectionTimer() - for _, sd := range b.subConns.Values() { - if sd.subConn != selected.subConn { - sd.subConn.Shutdown() - } - } - b.subConns = resolver.NewAddressMapV2[*scData]() - b.subConns.Set(selected.addr, selected) -} - -// requestConnectionLocked starts connecting on the subchannel corresponding to -// the current address. If no subchannel exists, one is created. If the current -// subchannel is in TransientFailure, a connection to the next address is -// attempted until a subchannel is found. -func (b *pickfirstBalancer) requestConnectionLocked() { - if !b.addressList.isValid() { - return - } - var lastErr error - for valid := true; valid; valid = b.addressList.increment() { - curAddr := b.addressList.currentAddress() - sd, ok := b.subConns.Get(curAddr) - if !ok { - var err error - // We want to assign the new scData to sd from the outer scope, - // hence we can't use := below. - sd, err = b.newSCData(curAddr) - if err != nil { - // This should never happen, unless the clientConn is being shut - // down. - if b.logger.V(2) { - b.logger.Infof("Failed to create a subConn for address %v: %v", curAddr.String(), err) - } - // Do nothing, the LB policy will be closed soon. - return - } - b.subConns.Set(curAddr, sd) - } - - switch sd.rawConnectivityState { - case connectivity.Idle: - sd.subConn.Connect() - b.scheduleNextConnectionLocked() - return - case connectivity.TransientFailure: - // The SubConn is being re-used and failed during a previous pass - // over the addressList. It has not completed backoff yet. - // Mark it as having failed and try the next address. - sd.connectionFailedInFirstPass = true - lastErr = sd.lastErr - continue - case connectivity.Connecting: - // Wait for the connection attempt to complete or the timer to fire - // before attempting the next address. - b.scheduleNextConnectionLocked() - return - default: - b.logger.Errorf("SubConn with unexpected state %v present in SubConns map.", sd.rawConnectivityState) - return - - } - } - - // All the remaining addresses in the list are in TRANSIENT_FAILURE, end the - // first pass if possible. - b.endFirstPassIfPossibleLocked(lastErr) -} - -func (b *pickfirstBalancer) scheduleNextConnectionLocked() { - b.cancelConnectionTimer() - if !b.addressList.hasNext() { - return - } - curAddr := b.addressList.currentAddress() - cancelled := false // Access to this is protected by the balancer's mutex. - closeFn := internal.TimeAfterFunc(connectionDelayInterval, func() { - b.mu.Lock() - defer b.mu.Unlock() - // If the scheduled task is cancelled while acquiring the mutex, return. - if cancelled { - return - } - if b.logger.V(2) { - b.logger.Infof("Happy Eyeballs timer expired while waiting for connection to %q.", curAddr.Addr) - } - if b.addressList.increment() { - b.requestConnectionLocked() - } - }) - // Access to the cancellation callback held by the balancer is guarded by - // the balancer's mutex, so it's safe to set the boolean from the callback. - b.cancelConnectionTimer = sync.OnceFunc(func() { - cancelled = true - closeFn() - }) -} - -func (b *pickfirstBalancer) updateSubConnState(sd *scData, newState balancer.SubConnState) { - b.mu.Lock() - defer b.mu.Unlock() - oldState := sd.rawConnectivityState - sd.rawConnectivityState = newState.ConnectivityState - // Previously relevant SubConns can still callback with state updates. - // To prevent pickers from returning these obsolete SubConns, this logic - // is included to check if the current list of active SubConns includes this - // SubConn. - if !b.isActiveSCData(sd) { - return - } - if newState.ConnectivityState == connectivity.Shutdown { - sd.effectiveState = connectivity.Shutdown - return - } - - // Record a connection attempt when exiting CONNECTING. - if newState.ConnectivityState == connectivity.TransientFailure { - sd.connectionFailedInFirstPass = true - connectionAttemptsFailedMetric.Record(b.metricsRecorder, 1, b.target) - } - - if newState.ConnectivityState == connectivity.Ready { - connectionAttemptsSucceededMetric.Record(b.metricsRecorder, 1, b.target) - b.shutdownRemainingLocked(sd) - if !b.addressList.seekTo(sd.addr) { - // This should not fail as we should have only one SubConn after - // entering READY. The SubConn should be present in the addressList. - b.logger.Errorf("Address %q not found address list in %v", sd.addr, b.addressList.addresses) - return - } - if !b.healthCheckingEnabled { - if b.logger.V(2) { - b.logger.Infof("SubConn %p reported connectivity state READY and the health listener is disabled. Transitioning SubConn to READY.", sd.subConn) - } - - sd.effectiveState = connectivity.Ready - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.Ready, - Picker: &picker{result: balancer.PickResult{SubConn: sd.subConn}}, - }) - return - } - if b.logger.V(2) { - b.logger.Infof("SubConn %p reported connectivity state READY. Registering health listener.", sd.subConn) - } - // Send a CONNECTING update to take the SubConn out of sticky-TF if - // required. - sd.effectiveState = connectivity.Connecting - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.Connecting, - Picker: &picker{err: balancer.ErrNoSubConnAvailable}, - }) - sd.subConn.RegisterHealthListener(func(scs balancer.SubConnState) { - b.updateSubConnHealthState(sd, scs) - }) - return - } - - // If the LB policy is READY, and it receives a subchannel state change, - // it means that the READY subchannel has failed. - // A SubConn can also transition from CONNECTING directly to IDLE when - // a transport is successfully created, but the connection fails - // before the SubConn can send the notification for READY. We treat - // this as a successful connection and transition to IDLE. - // TODO: https://github.com/grpc/grpc-go/issues/7862 - Remove the second - // part of the if condition below once the issue is fixed. - if oldState == connectivity.Ready || (oldState == connectivity.Connecting && newState.ConnectivityState == connectivity.Idle) { - // Once a transport fails, the balancer enters IDLE and starts from - // the first address when the picker is used. - b.shutdownRemainingLocked(sd) - sd.effectiveState = newState.ConnectivityState - // READY SubConn interspliced in between CONNECTING and IDLE, need to - // account for that. - if oldState == connectivity.Connecting { - // A known issue (https://github.com/grpc/grpc-go/issues/7862) - // causes a race that prevents the READY state change notification. - // This works around it. - connectionAttemptsSucceededMetric.Record(b.metricsRecorder, 1, b.target) - } - disconnectionsMetric.Record(b.metricsRecorder, 1, b.target) - b.addressList.reset() - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.Idle, - Picker: &idlePicker{exitIdle: sync.OnceFunc(b.ExitIdle)}, - }) - return - } - - if b.firstPass { - switch newState.ConnectivityState { - case connectivity.Connecting: - // The effective state can be in either IDLE, CONNECTING or - // TRANSIENT_FAILURE. If it's TRANSIENT_FAILURE, stay in - // TRANSIENT_FAILURE until it's READY. See A62. - if sd.effectiveState != connectivity.TransientFailure { - sd.effectiveState = connectivity.Connecting - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.Connecting, - Picker: &picker{err: balancer.ErrNoSubConnAvailable}, - }) - } - case connectivity.TransientFailure: - sd.lastErr = newState.ConnectionError - sd.effectiveState = connectivity.TransientFailure - // Since we're re-using common SubConns while handling resolver - // updates, we could receive an out of turn TRANSIENT_FAILURE from - // a pass over the previous address list. Happy Eyeballs will also - // cause out of order updates to arrive. - - if curAddr := b.addressList.currentAddress(); equalAddressIgnoringBalAttributes(&curAddr, &sd.addr) { - b.cancelConnectionTimer() - if b.addressList.increment() { - b.requestConnectionLocked() - return - } - } - - // End the first pass if we've seen a TRANSIENT_FAILURE from all - // SubConns once. - b.endFirstPassIfPossibleLocked(newState.ConnectionError) - } - return - } - - // We have finished the first pass, keep re-connecting failing SubConns. - switch newState.ConnectivityState { - case connectivity.TransientFailure: - b.numTF = (b.numTF + 1) % b.subConns.Len() - sd.lastErr = newState.ConnectionError - if b.numTF%b.subConns.Len() == 0 { - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.TransientFailure, - Picker: &picker{err: newState.ConnectionError}, - }) - } - // We don't need to request re-resolution since the SubConn already - // does that before reporting TRANSIENT_FAILURE. - // TODO: #7534 - Move re-resolution requests from SubConn into - // pick_first. - case connectivity.Idle: - sd.subConn.Connect() - } -} - -// endFirstPassIfPossibleLocked ends the first happy-eyeballs pass if all the -// addresses are tried and their SubConns have reported a failure. -func (b *pickfirstBalancer) endFirstPassIfPossibleLocked(lastErr error) { - // An optimization to avoid iterating over the entire SubConn map. - if b.addressList.isValid() { - return - } - // Connect() has been called on all the SubConns. The first pass can be - // ended if all the SubConns have reported a failure. - for _, sd := range b.subConns.Values() { - if !sd.connectionFailedInFirstPass { - return - } - } - b.firstPass = false - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.TransientFailure, - Picker: &picker{err: lastErr}, - }) - // Start re-connecting all the SubConns that are already in IDLE. - for _, sd := range b.subConns.Values() { - if sd.rawConnectivityState == connectivity.Idle { - sd.subConn.Connect() - } - } -} - -func (b *pickfirstBalancer) isActiveSCData(sd *scData) bool { - activeSD, found := b.subConns.Get(sd.addr) - return found && activeSD == sd -} - -func (b *pickfirstBalancer) updateSubConnHealthState(sd *scData, state balancer.SubConnState) { - b.mu.Lock() - defer b.mu.Unlock() - // Previously relevant SubConns can still callback with state updates. - // To prevent pickers from returning these obsolete SubConns, this logic - // is included to check if the current list of active SubConns includes - // this SubConn. - if !b.isActiveSCData(sd) { - return - } - sd.effectiveState = state.ConnectivityState - switch state.ConnectivityState { - case connectivity.Ready: - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.Ready, - Picker: &picker{result: balancer.PickResult{SubConn: sd.subConn}}, - }) - case connectivity.TransientFailure: - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.TransientFailure, - Picker: &picker{err: fmt.Errorf("pickfirst: health check failure: %v", state.ConnectionError)}, - }) - case connectivity.Connecting: - b.updateBalancerState(balancer.State{ - ConnectivityState: connectivity.Connecting, - Picker: &picker{err: balancer.ErrNoSubConnAvailable}, - }) - default: - b.logger.Errorf("Got unexpected health update for SubConn %p: %v", state) - } -} - -// updateBalancerState stores the state reported to the channel and calls -// ClientConn.UpdateState(). As an optimization, it avoids sending duplicate -// updates to the channel. -func (b *pickfirstBalancer) updateBalancerState(newState balancer.State) { - // In case of TransientFailures allow the picker to be updated to update - // the connectivity error, in all other cases don't send duplicate state - // updates. - if newState.ConnectivityState == b.state && b.state != connectivity.TransientFailure { - return - } - b.forceUpdateConcludedStateLocked(newState) -} - -// forceUpdateConcludedStateLocked stores the state reported to the channel and -// calls ClientConn.UpdateState(). -// A separate function is defined to force update the ClientConn state since the -// channel doesn't correctly assume that LB policies start in CONNECTING and -// relies on LB policy to send an initial CONNECTING update. -func (b *pickfirstBalancer) forceUpdateConcludedStateLocked(newState balancer.State) { - b.state = newState.ConnectivityState - b.cc.UpdateState(newState) -} - -type picker struct { - result balancer.PickResult - err error -} - -func (p *picker) Pick(balancer.PickInfo) (balancer.PickResult, error) { - return p.result, p.err -} - -// idlePicker is used when the SubConn is IDLE and kicks the SubConn into -// CONNECTING when Pick is called. -type idlePicker struct { - exitIdle func() -} - -func (i *idlePicker) Pick(balancer.PickInfo) (balancer.PickResult, error) { - i.exitIdle() - return balancer.PickResult{}, balancer.ErrNoSubConnAvailable -} - -// addressList manages sequentially iterating over addresses present in a list -// of endpoints. It provides a 1 dimensional view of the addresses present in -// the endpoints. -// This type is not safe for concurrent access. -type addressList struct { - addresses []resolver.Address - idx int -} - -func (al *addressList) isValid() bool { - return al.idx < len(al.addresses) -} - -func (al *addressList) size() int { - return len(al.addresses) -} - -// increment moves to the next index in the address list. -// This method returns false if it went off the list, true otherwise. -func (al *addressList) increment() bool { - if !al.isValid() { - return false - } - al.idx++ - return al.idx < len(al.addresses) -} - -// currentAddress returns the current address pointed to in the addressList. -// If the list is in an invalid state, it returns an empty address instead. -func (al *addressList) currentAddress() resolver.Address { - if !al.isValid() { - return resolver.Address{} - } - return al.addresses[al.idx] -} - -func (al *addressList) reset() { - al.idx = 0 -} - -func (al *addressList) updateAddrs(addrs []resolver.Address) { - al.addresses = addrs - al.reset() -} - -// seekTo returns false if the needle was not found and the current index was -// left unchanged. -func (al *addressList) seekTo(needle resolver.Address) bool { - for ai, addr := range al.addresses { - if !equalAddressIgnoringBalAttributes(&addr, &needle) { - continue - } - al.idx = ai - return true - } - return false -} - -// hasNext returns whether incrementing the addressList will result in moving -// past the end of the list. If the list has already moved past the end, it -// returns false. -func (al *addressList) hasNext() bool { - if !al.isValid() { - return false - } - return al.idx+1 < len(al.addresses) -} - -// equalAddressIgnoringBalAttributes returns true is a and b are considered -// equal. This is different from the Equal method on the resolver.Address type -// which considers all fields to determine equality. Here, we only consider -// fields that are meaningful to the SubConn. -func equalAddressIgnoringBalAttributes(a, b *resolver.Address) bool { - return a.Addr == b.Addr && a.ServerName == b.ServerName && - a.Attributes.Equal(b.Attributes) -} +// Name is the name of the pick_first_leaf balancer. +// Deprecated: Use the balancer/pickfirst package's Name instead. +const Name = "pick_first" diff --git a/balancer/pickfirst/pickfirstleaf/pickfirstleaf_ext_test.go b/balancer/pickfirst/pickfirstleaf_ext_test.go similarity index 98% rename from balancer/pickfirst/pickfirstleaf/pickfirstleaf_ext_test.go rename to balancer/pickfirst/pickfirstleaf_ext_test.go index 1c4c96d00862..2263b1a4d54e 100644 --- a/balancer/pickfirst/pickfirstleaf/pickfirstleaf_ext_test.go +++ b/balancer/pickfirst/pickfirstleaf_ext_test.go @@ -15,7 +15,7 @@ * limitations under the License. */ -package pickfirstleaf_test +package pickfirst_test import ( "context" @@ -28,8 +28,8 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/balancer" + pfbalancer "google.golang.org/grpc/balancer/pickfirst" pfinternal "google.golang.org/grpc/balancer/pickfirst/internal" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" "google.golang.org/grpc/codes" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" @@ -889,7 +889,7 @@ func (s) TestPickFirstLeaf_HappyEyeballs_TF_AfterEndOfList(t *testing.T) { tmr := stats.NewTestMetricsRecorder() dialer := testutils.NewBlockingDialer() opts := []grpc.DialOption{ - grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pickfirstleaf.Name)), + grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), grpc.WithContextDialer(dialer.DialContext), grpc.WithStatsHandler(tmr), } @@ -978,7 +978,7 @@ func (s) TestPickFirstLeaf_HappyEyeballs_TriggerConnectionDelay(t *testing.T) { tmr := stats.NewTestMetricsRecorder() dialer := testutils.NewBlockingDialer() opts := []grpc.DialOption{ - grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pickfirstleaf.Name)), + grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), grpc.WithContextDialer(dialer.DialContext), grpc.WithStatsHandler(tmr), } @@ -1038,7 +1038,7 @@ func (s) TestPickFirstLeaf_HappyEyeballs_TF_ThenTimerFires(t *testing.T) { tmr := stats.NewTestMetricsRecorder() dialer := testutils.NewBlockingDialer() opts := []grpc.DialOption{ - grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pickfirstleaf.Name)), + grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), grpc.WithContextDialer(dialer.DialContext), grpc.WithStatsHandler(tmr), } @@ -1100,7 +1100,7 @@ func (s) TestPickFirstLeaf_InterleavingIPV4Preffered(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() cc := testutils.NewBalancerClientConn(t) - bal := balancer.Get(pickfirstleaf.Name).Build(cc, balancer.BuildOptions{}) + bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) defer bal.Close() ccState := balancer.ClientConnState{ ResolverState: resolver.State{ @@ -1146,7 +1146,7 @@ func (s) TestPickFirstLeaf_InterleavingIPv6Preffered(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() cc := testutils.NewBalancerClientConn(t) - bal := balancer.Get(pickfirstleaf.Name).Build(cc, balancer.BuildOptions{}) + bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) defer bal.Close() ccState := balancer.ClientConnState{ ResolverState: resolver.State{ @@ -1191,7 +1191,7 @@ func (s) TestPickFirstLeaf_InterleavingUnknownPreffered(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() cc := testutils.NewBalancerClientConn(t) - bal := balancer.Get(pickfirstleaf.Name).Build(cc, balancer.BuildOptions{}) + bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) defer bal.Close() ccState := balancer.ClientConnState{ ResolverState: resolver.State{ @@ -1242,13 +1242,13 @@ func (s) TestPickFirstLeaf_HealthListenerEnabled(t *testing.T) { defer cancel() bf := stub.BalancerFuncs{ Init: func(bd *stub.BalancerData) { - bd.ChildBalancer = balancer.Get(pickfirstleaf.Name).Build(bd.ClientConn, bd.BuildOptions) + bd.ChildBalancer = balancer.Get(pfbalancer.Name).Build(bd.ClientConn, bd.BuildOptions) }, Close: func(bd *stub.BalancerData) { bd.ChildBalancer.Close() }, UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { - ccs.ResolverState = pickfirstleaf.EnableHealthListener(ccs.ResolverState) + ccs.ResolverState = pfbalancer.EnableHealthListener(ccs.ResolverState) return bd.ChildBalancer.UpdateClientConnState(ccs) }, } @@ -1290,7 +1290,7 @@ func (s) TestPickFirstLeaf_HealthListenerNotEnabled(t *testing.T) { healthListenerCh: healthListenerCh, subConnStateCh: make(chan balancer.SubConnState, 5), } - bd.ChildBalancer = balancer.Get(pickfirstleaf.Name).Build(ccw, bd.BuildOptions) + bd.ChildBalancer = balancer.Get(pfbalancer.Name).Build(ccw, bd.BuildOptions) }, Close: func(bd *stub.BalancerData) { bd.ChildBalancer.Close() @@ -1346,13 +1346,13 @@ func (s) TestPickFirstLeaf_HealthUpdates(t *testing.T) { healthListenerCh: healthListenerCh, subConnStateCh: scConnectivityStateCh, } - bd.ChildBalancer = balancer.Get(pickfirstleaf.Name).Build(ccw, bd.BuildOptions) + bd.ChildBalancer = balancer.Get(pfbalancer.Name).Build(ccw, bd.BuildOptions) }, Close: func(bd *stub.BalancerData) { bd.ChildBalancer.Close() }, UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { - ccs.ResolverState = pickfirstleaf.EnableHealthListener(ccs.ResolverState) + ccs.ResolverState = pfbalancer.EnableHealthListener(ccs.ResolverState) return bd.ChildBalancer.UpdateClientConnState(ccs) }, } @@ -1433,7 +1433,7 @@ func (s) TestPickFirstLeaf_HealthUpdates(t *testing.T) { func (s) TestPickFirstLeaf_AddressUpdateWithMetadata(t *testing.T) { dialer := testutils.NewBlockingDialer() dopts := []grpc.DialOption{ - grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pickfirstleaf.Name)), + grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), grpc.WithContextDialer(dialer.DialContext), } cc, r, backends := setupPickFirstLeaf(t, 2, dopts...) @@ -1514,7 +1514,7 @@ func (s) TestPickFirstLeaf_Reconnection(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() cc := testutils.NewBalancerClientConn(t) - bal := balancer.Get(pickfirstleaf.Name).Build(cc, balancer.BuildOptions{}) + bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) defer bal.Close() ccState := balancer.ClientConnState{ ResolverState: resolver.State{ @@ -1699,7 +1699,7 @@ func (b *stateStoringBalancerBuilder) Name() string { func (b *stateStoringBalancerBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { bal := &stateStoringBalancer{} - bal.Balancer = balancer.Get(pickfirstleaf.Name).Build(&stateStoringCCWrapper{cc, bal}, opts) + bal.Balancer = balancer.Get(pfbalancer.Name).Build(&stateStoringCCWrapper{cc, bal}, opts) b.balancer <- bal return bal } diff --git a/balancer/pickfirst/pickfirstleaf/pickfirstleaf_test.go b/balancer/pickfirst/pickfirstleaf_test.go similarity index 95% rename from balancer/pickfirst/pickfirstleaf/pickfirstleaf_test.go rename to balancer/pickfirst/pickfirstleaf_test.go index 5a73dccd07c3..05fa18c6d064 100644 --- a/balancer/pickfirst/pickfirstleaf/pickfirstleaf_test.go +++ b/balancer/pickfirst/pickfirstleaf_test.go @@ -16,7 +16,7 @@ * */ -package pickfirstleaf +package pickfirst import ( "context" @@ -27,27 +27,10 @@ import ( "google.golang.org/grpc/attributes" "google.golang.org/grpc/balancer" "google.golang.org/grpc/connectivity" - "google.golang.org/grpc/internal/grpctest" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/resolver" ) -const ( - // Default timeout for tests in this package. - defaultTestTimeout = 10 * time.Second - // Default short timeout, to be used when waiting for events which are not - // expected to happen. - defaultTestShortTimeout = 100 * time.Millisecond -) - -type s struct { - grpctest.Tester -} - -func Test(t *testing.T) { - grpctest.RunSubTests(t, s{}) -} - // TestAddressList_Iteration verifies the behaviour of the addressList while // iterating through the entries. func (s) TestAddressList_Iteration(t *testing.T) { diff --git a/balancer/ringhash/ringhash.go b/balancer/ringhash/ringhash.go index d4c41facb9a4..9ff92ada0f4d 100644 --- a/balancer/ringhash/ringhash.go +++ b/balancer/ringhash/ringhash.go @@ -40,7 +40,7 @@ import ( "google.golang.org/grpc/balancer/base" "google.golang.org/grpc/balancer/endpointsharding" "google.golang.org/grpc/balancer/lazy" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" + "google.golang.org/grpc/balancer/pickfirst" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/balancer/weight" "google.golang.org/grpc/internal/grpclog" @@ -55,7 +55,7 @@ import ( const Name = "ring_hash_experimental" func lazyPickFirstBuilder(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { - return lazy.NewBalancer(cc, opts, balancer.Get(pickfirstleaf.Name).Build) + return lazy.NewBalancer(cc, opts, balancer.Get(pickfirst.Name).Build) } func init() { @@ -202,7 +202,7 @@ func (b *ringhashBalancer) UpdateClientConnState(ccs balancer.ClientConnState) e if err := b.child.UpdateClientConnState(balancer.ClientConnState{ // Make pickfirst children use health listeners for outlier detection // and health checking to work. - ResolverState: pickfirstleaf.EnableHealthListener(ccs.ResolverState), + ResolverState: pickfirst.EnableHealthListener(ccs.ResolverState), }); err != nil { return err } diff --git a/balancer/roundrobin/roundrobin.go b/balancer/roundrobin/roundrobin.go index 22045bf3946c..22e6e3267945 100644 --- a/balancer/roundrobin/roundrobin.go +++ b/balancer/roundrobin/roundrobin.go @@ -26,7 +26,7 @@ import ( "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/endpointsharding" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" + "google.golang.org/grpc/balancer/pickfirst" "google.golang.org/grpc/grpclog" internalgrpclog "google.golang.org/grpc/internal/grpclog" ) @@ -47,7 +47,7 @@ func (bb builder) Name() string { } func (bb builder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { - childBuilder := balancer.Get(pickfirstleaf.Name).Build + childBuilder := balancer.Get(pickfirst.Name).Build bal := &rrBalancer{ cc: cc, Balancer: endpointsharding.NewBalancer(cc, opts, childBuilder, endpointsharding.Options{}), @@ -67,6 +67,6 @@ func (b *rrBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error { return b.Balancer.UpdateClientConnState(balancer.ClientConnState{ // Enable the health listener in pickfirst children for client side health // checks and outlier detection, if configured. - ResolverState: pickfirstleaf.EnableHealthListener(ccs.ResolverState), + ResolverState: pickfirst.EnableHealthListener(ccs.ResolverState), }) } diff --git a/balancer/weightedroundrobin/balancer.go b/balancer/weightedroundrobin/balancer.go index 1bd4bb31131b..0de02e5e5088 100644 --- a/balancer/weightedroundrobin/balancer.go +++ b/balancer/weightedroundrobin/balancer.go @@ -38,7 +38,7 @@ import ( "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/endpointsharding" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" + "google.golang.org/grpc/balancer/pickfirst" "google.golang.org/grpc/balancer/weightedroundrobin/internal" "google.golang.org/grpc/balancer/weightedtarget" "google.golang.org/grpc/connectivity" @@ -109,7 +109,7 @@ func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Ba scToWeight: make(map[balancer.SubConn]*endpointWeight), } - b.child = endpointsharding.NewBalancer(b, bOpts, balancer.Get(pickfirstleaf.Name).Build, endpointsharding.Options{}) + b.child = endpointsharding.NewBalancer(b, bOpts, balancer.Get(pickfirst.Name).Build, endpointsharding.Options{}) b.logger = prefixLogger(b) b.logger.Infof("Created") return b @@ -239,7 +239,7 @@ func (b *wrrBalancer) UpdateClientConnState(ccs balancer.ClientConnState) error return b.child.UpdateClientConnState(balancer.ClientConnState{ // Make pickfirst children use health listeners for outlier detection to // work. - ResolverState: pickfirstleaf.EnableHealthListener(ccs.ResolverState), + ResolverState: pickfirst.EnableHealthListener(ccs.ResolverState), }) } diff --git a/clientconn_test.go b/clientconn_test.go index 68c224140148..222858697d17 100644 --- a/clientconn_test.go +++ b/clientconn_test.go @@ -37,7 +37,6 @@ import ( "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" internalbackoff "google.golang.org/grpc/internal/backoff" - "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/grpctest" "google.golang.org/grpc/internal/transport" @@ -426,10 +425,7 @@ func (s) TestWithTransportCredentialsTLS(t *testing.T) { // per "round" of attempts) for old pickfirst and once per address for new pickfirst. func (s) TestNewClient_BackoffCountPerRetryGroup(t *testing.T) { var attempts uint32 - wantBackoffs := uint32(1) - if envconfig.NewPickFirstEnabled { - wantBackoffs = 2 - } + wantBackoffs := uint32(2) getMinConnectTimeout := func() time.Duration { if atomic.AddUint32(&attempts, 1) <= wantBackoffs { // Once all addresses are exhausted, hang around and wait for the diff --git a/examples/features/customloadbalancer/client/customroundrobin/customroundrobin.go b/examples/features/customloadbalancer/client/customroundrobin/customroundrobin.go index f53919ee8561..7fd660de0c5c 100644 --- a/examples/features/customloadbalancer/client/customroundrobin/customroundrobin.go +++ b/examples/features/customloadbalancer/client/customroundrobin/customroundrobin.go @@ -27,7 +27,7 @@ import ( _ "google.golang.org/grpc" // to register pick_first "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/endpointsharding" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" + "google.golang.org/grpc/balancer/pickfirst" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/serviceconfig" ) @@ -68,7 +68,7 @@ func (customRoundRobinBuilder) Build(cc balancer.ClientConn, bOpts balancer.Buil ClientConn: cc, bOpts: bOpts, } - crr.Balancer = endpointsharding.NewBalancer(crr, bOpts, balancer.Get(pickfirstleaf.Name).Build, endpointsharding.Options{}) + crr.Balancer = endpointsharding.NewBalancer(crr, bOpts, balancer.Get(pickfirst.Name).Build, endpointsharding.Options{}) return crr } diff --git a/internal/envconfig/envconfig.go b/internal/envconfig/envconfig.go index 293a9a40b241..91f760936c02 100644 --- a/internal/envconfig/envconfig.go +++ b/internal/envconfig/envconfig.go @@ -52,12 +52,6 @@ var ( // or "false". EnforceALPNEnabled = boolFromEnv("GRPC_ENFORCE_ALPN_ENABLED", true) - // NewPickFirstEnabled is set if the new pickfirst leaf policy is to be used - // instead of the exiting pickfirst implementation. This can be disabled by - // setting the environment variable "GRPC_EXPERIMENTAL_ENABLE_NEW_PICK_FIRST" - // to "false". - NewPickFirstEnabled = boolFromEnv("GRPC_EXPERIMENTAL_ENABLE_NEW_PICK_FIRST", true) - // XDSEndpointHashKeyBackwardCompat controls the parsing of the endpoint hash // key from EDS LbEndpoint metadata. Endpoint hash keys can be disabled by // setting "GRPC_XDS_ENDPOINT_HASH_KEY_BACKWARD_COMPAT" to "true". When the diff --git a/internal/xds/balancer/outlierdetection/balancer_test.go b/internal/xds/balancer/outlierdetection/balancer_test.go index 3cb1dfc1004b..cf6a29164e95 100644 --- a/internal/xds/balancer/outlierdetection/balancer_test.go +++ b/internal/xds/balancer/outlierdetection/balancer_test.go @@ -33,7 +33,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "google.golang.org/grpc" "google.golang.org/grpc/balancer" - "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" + "google.golang.org/grpc/balancer/pickfirst" "google.golang.org/grpc/balancer/weightedroundrobin" "google.golang.org/grpc/codes" "google.golang.org/grpc/connectivity" @@ -1840,7 +1840,7 @@ func (s) TestPickFirstHealthListenerDisabled(t *testing.T) { }, MaxEjectionPercent: 100, ChildPolicy: &iserviceconfig.BalancerConfig{ - Name: pickfirstleaf.Name, + Name: pickfirst.Name, }, } diff --git a/internal/xds/balancer/outlierdetection/e2e_test/outlierdetection_test.go b/internal/xds/balancer/outlierdetection/e2e_test/outlierdetection_test.go index 51a3013df298..12a42991ad50 100644 --- a/internal/xds/balancer/outlierdetection/e2e_test/outlierdetection_test.go +++ b/internal/xds/balancer/outlierdetection/e2e_test/outlierdetection_test.go @@ -28,10 +28,8 @@ import ( "github.com/google/go-cmp/cmp" "google.golang.org/grpc" - "google.golang.org/grpc/balancer/weightedroundrobin" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/internal" - "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/grpctest" "google.golang.org/grpc/internal/stubserver" "google.golang.org/grpc/peer" @@ -50,14 +48,6 @@ var ( leafPolicyName = "round_robin" ) -func init() { - // Test the health listener code path for ejection when the experimental - // pickfirst is enabled. - if envconfig.NewPickFirstEnabled { - leafPolicyName = weightedroundrobin.Name - } -} - type s struct { grpctest.Tester } diff --git a/test/clientconn_state_transition_test.go b/test/clientconn_state_transition_test.go index 6e179c1cd59a..1706a81a257d 100644 --- a/test/clientconn_state_transition_test.go +++ b/test/clientconn_state_transition_test.go @@ -34,7 +34,6 @@ import ( "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/internal" "google.golang.org/grpc/internal/balancer/stub" - "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/stubserver" "google.golang.org/grpc/internal/testutils" @@ -345,19 +344,13 @@ func (s) TestStateTransitions_TriesAllAddrsBeforeTransientFailure(t *testing.T) client.Connect() stateNotifications := testBalancerBuilder.nextStateNotifier() want := []connectivity.State{ + // The first subconn fails. + connectivity.Connecting, + connectivity.TransientFailure, + // The second subconn connects. connectivity.Connecting, connectivity.Ready, } - if envconfig.NewPickFirstEnabled { - want = []connectivity.State{ - // The first subconn fails. - connectivity.Connecting, - connectivity.TransientFailure, - // The second subconn connects. - connectivity.Connecting, - connectivity.Ready, - } - } ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() for i := 0; i < len(want); i++ { diff --git a/xds/googledirectpath/googlec2p.go b/xds/googledirectpath/googlec2p.go index 9ef59f1a92a7..7cd7c76d47d0 100644 --- a/xds/googledirectpath/googlec2p.go +++ b/xds/googledirectpath/googlec2p.go @@ -182,13 +182,7 @@ func newNodeConfig(zone string, ipv6Capable bool) map[string]any { "id": fmt.Sprintf("C2P-%d", randInt()), "locality": map[string]any{"zone": zone}, } - if envconfig.NewPickFirstEnabled { - // Enable dualstack endpoints in TD. - // TODO(https://github.com/grpc/grpc-go/issues/8561): remove IPv6 metadata server queries entirely after old pick first is removed. - ipv6Capable = true - } else { - logger.Infof("GRPC_EXPERIMENTAL_ENABLE_NEW_PICK_FIRST is disabled, setting ipv6Capable node metadata based on metadata server query") - } + // Enable dualstack endpoints in TD. if ipv6Capable { node["metadata"] = map[string]any{ipv6CapableMetadataName: true} } diff --git a/xds/googledirectpath/googlec2p_test.go b/xds/googledirectpath/googlec2p_test.go index 74ee65db4987..04ff61f0e29f 100644 --- a/xds/googledirectpath/googlec2p_test.go +++ b/xds/googledirectpath/googlec2p_test.go @@ -106,7 +106,7 @@ func useCleanUniverseDomain(t *testing.T) { // TODO(https://github.com/grpc/grpc-go/issues/8561): this content can be hardcoded directly // in wanted bootstraps again after old pick first is removed. func expectedNodeJSON(ipv6Capable bool) []byte { - if !envconfig.NewPickFirstEnabled && !ipv6Capable { + if !ipv6Capable { return []byte(`{ "id": "C2P-666", "locality": { From c8191ffea0ab7279a8079509dbeabb6eec062234 Mon Sep 17 00:00:00 2001 From: Arjan Bal Date: Mon, 27 Oct 2025 12:16:37 +0530 Subject: [PATCH 2/8] merge test files --- .../{pickfirstleaf.go => pickfirst.go} | 0 balancer/pickfirst/pickfirst_ext_test.go | 1771 ++++++++++++++++ balancer/pickfirst/pickfirst_test.go | 244 +++ balancer/pickfirst/pickfirstleaf_ext_test.go | 1820 ----------------- balancer/pickfirst/pickfirstleaf_test.go | 275 --- 5 files changed, 2015 insertions(+), 2095 deletions(-) rename balancer/pickfirst/{pickfirstleaf.go => pickfirst.go} (100%) delete mode 100644 balancer/pickfirst/pickfirstleaf_ext_test.go delete mode 100644 balancer/pickfirst/pickfirstleaf_test.go diff --git a/balancer/pickfirst/pickfirstleaf.go b/balancer/pickfirst/pickfirst.go similarity index 100% rename from balancer/pickfirst/pickfirstleaf.go rename to balancer/pickfirst/pickfirst.go diff --git a/balancer/pickfirst/pickfirst_ext_test.go b/balancer/pickfirst/pickfirst_ext_test.go index 1c36855d97f5..9aeef7aacddf 100644 --- a/balancer/pickfirst/pickfirst_ext_test.go +++ b/balancer/pickfirst/pickfirst_ext_test.go @@ -24,9 +24,11 @@ import ( "errors" "fmt" "strings" + "sync" "testing" "time" + "github.com/google/go-cmp/cmp" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/balancer" @@ -38,9 +40,13 @@ import ( "google.golang.org/grpc/internal" "google.golang.org/grpc/internal/balancer/stub" "google.golang.org/grpc/internal/channelz" + "google.golang.org/grpc/internal/grpcsync" + "google.golang.org/grpc/internal/grpctest" "google.golang.org/grpc/internal/stubserver" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/testutils/pickfirst" + "google.golang.org/grpc/internal/testutils/stats" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/resolver" "google.golang.org/grpc/resolver/manual" "google.golang.org/grpc/serviceconfig" @@ -53,8 +59,29 @@ import ( const ( pickFirstServiceConfig = `{"loadBalancingConfig": [{"pick_first":{}}]}` // Default timeout for tests in this package. + defaultTestTimeout = 10 * time.Second + // Default short timeout, to be used when waiting for events which are not + // expected to happen. + defaultTestShortTimeout = 100 * time.Millisecond + stateStoringBalancerName = "state_storing" ) +var ( + stateStoringServiceConfig = fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, stateStoringBalancerName) + ignoreBalAttributesOpt = cmp.Transformer("IgnoreBalancerAttributes", func(a resolver.Address) resolver.Address { + a.BalancerAttributes = nil + return a + }) +) + +type s struct { + grpctest.Tester +} + +func Test(t *testing.T) { + grpctest.RunSubTests(t, s{}) +} + func init() { channelz.TurnOn() } @@ -1033,3 +1060,1747 @@ func (s) TestPickFirst_ResolverError_ZeroAddresses_WithPreviousUpdate(t *testing t.Fatal("Timeout when waiting for RPCs to fail with error returned by the name resolver") } } + +// testServer is a server than can be stopped and resumed without closing +// the listener. This guarantees the same port number (and address) is used +// after restart. When a server is stopped, it accepts and closes all tcp +// connections from clients. +type testServer struct { + stubserver.StubServer + lis *testutils.RestartableListener +} + +func (s *testServer) stop() { + s.lis.Stop() +} + +func (s *testServer) resume() { + s.lis.Restart() +} + +func newTestServer(t *testing.T) *testServer { + l, err := testutils.LocalTCPListener() + if err != nil { + t.Fatalf("Failed to create listener: %v", err) + } + rl := testutils.NewRestartableListener(l) + ss := stubserver.StubServer{ + EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return &testpb.Empty{}, nil }, + Listener: rl, + } + return &testServer{ + StubServer: ss, + lis: rl, + } +} + +// setupPickFirstLeaf performs steps required for pick_first tests. It starts a +// bunch of backends exporting the TestService, and creates a ClientConn to them. +func setupPickFirstLeaf(t *testing.T, backendCount int, opts ...grpc.DialOption) (*grpc.ClientConn, *manual.Resolver, *backendManager) { + t.Helper() + r := manual.NewBuilderWithScheme("whatever") + backends := make([]*testServer, backendCount) + addrs := make([]resolver.Address, backendCount) + + for i := 0; i < backendCount; i++ { + server := newTestServer(t) + backend := stubserver.StartTestService(t, &server.StubServer) + t.Cleanup(func() { + backend.Stop() + }) + backends[i] = server + addrs[i] = resolver.Address{Addr: backend.Address} + } + + dopts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithResolvers(r), + } + dopts = append(dopts, opts...) + cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...) + if err != nil { + t.Fatalf("grpc.NewClient() failed: %v", err) + } + t.Cleanup(func() { cc.Close() }) + + // At this point, the resolver has not returned any addresses to the channel. + // This RPC must block until the context expires. + sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) + defer sCancel() + client := testgrpc.NewTestServiceClient(cc) + if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded { + t.Fatalf("EmptyCall() = %s, want %s", status.Code(err), codes.DeadlineExceeded) + } + return cc, r, &backendManager{backends} +} + +// TestPickFirstLeaf_SimpleResolverUpdate tests the behaviour of the pick first +// policy when given an list of addresses. The following steps are carried +// out in order: +// 1. A list of addresses are given through the resolver. Only one +// of the servers is running. +// 2. RPCs are sent to verify they reach the running server. +// +// The state transitions of the ClientConn and all the SubConns created are +// verified. +func (s) TestPickFirstLeaf_SimpleResolverUpdate_FirstServerReady(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + + cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + r.UpdateState(resolver.State{Addresses: addrs}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, + } + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_SimpleResolverUpdate_FirstServerUnReady(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + + cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + bm.stopAllExcept(1) + + r.UpdateState(resolver.State{Addresses: addrs}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_SimpleResolverUpdate_DuplicateAddrs(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + + cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + bm.stopAllExcept(1) + + // Add a duplicate entry in the addresslist + r.UpdateState(resolver.State{ + Addresses: append([]resolver.Address{addrs[0]}, addrs...), + }) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +// TestPickFirstLeaf_ResolverUpdates_DisjointLists tests the behaviour of the pick first +// policy when the following steps are carried out in order: +// 1. A list of addresses are given through the resolver. Only one +// of the servers is running. +// 2. RPCs are sent to verify they reach the running server. +// 3. A second resolver update is sent. Again, only one of the servers is +// running. This may not be the same server as before. +// 4. RPCs are sent to verify they reach the running server. +// +// The state transitions of the ClientConn and all the SubConns created are +// verified. +func (s) TestPickFirstLeaf_ResolverUpdates_DisjointLists(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + cc, r, bm := setupPickFirstLeaf(t, 4, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + bm.backends[0].stop() + r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + bm.backends[2].stop() + r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[2], addrs[3]}}) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[3]); err != nil { + t.Fatal(err) + } + wantSCStates = []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[2]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[3]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_ResolverUpdates_ActiveBackendInUpdatedList(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + cc, r, bm := setupPickFirstLeaf(t, 3, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + bm.backends[0].stop() + r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + bm.backends[2].stop() + r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[2], addrs[1]}}) + + // Verify that the ClientConn stays in READY. + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + testutils.AwaitNoStateChange(sCtx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + wantSCStates = []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_ResolverUpdates_InActiveBackendInUpdatedList(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + cc, r, bm := setupPickFirstLeaf(t, 3, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + bm.backends[0].stop() + r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + bm.backends[2].stop() + bm.backends[0].resume() + + r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[2]}}) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + wantSCStates = []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_ResolverUpdates_IdenticalLists(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + bm.backends[0].stop() + r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) + + // Verify that the ClientConn stays in READY. + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + testutils.AwaitNoStateChange(sCtx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + wantSCStates = []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +// TestPickFirstLeaf_StopConnectedServer tests the behaviour of the pick first +// policy when the connected server is shut down. It carries out the following +// steps in order: +// 1. A list of addresses are given through the resolver. Only one +// of the servers is running. +// 2. The running server is stopped, causing the ClientConn to enter IDLE. +// 3. A (possibly different) server is started. +// 4. RPCs are made to kick the ClientConn out of IDLE. The test verifies that +// the RPCs reach the running server. +// +// The test verifies the ClientConn state transitions. +func (s) TestPickFirstLeaf_StopConnectedServer_FirstServerRestart(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + // shutdown all active backends except the target. + bm.stopAllExcept(0) + + r.UpdateState(resolver.State{Addresses: addrs}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + // Shut down the connected server. + bm.backends[0].stop() + testutils.AwaitState(ctx, t, cc, connectivity.Idle) + + // Start the new target server. + bm.backends[0].resume() + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + connectivity.Idle, + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_StopConnectedServer_SecondServerRestart(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + // shutdown all active backends except the target. + bm.stopAllExcept(1) + + r.UpdateState(resolver.State{Addresses: addrs}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + // Shut down the connected server. + bm.backends[1].stop() + testutils.AwaitState(ctx, t, cc, connectivity.Idle) + + // Start the new target server. + bm.backends[1].resume() + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + + wantSCStates = []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + connectivity.Idle, + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_StopConnectedServer_SecondServerToFirst(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + // shutdown all active backends except the target. + bm.stopAllExcept(1) + + r.UpdateState(resolver.State{Addresses: addrs}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + // Shut down the connected server. + bm.backends[1].stop() + testutils.AwaitState(ctx, t, cc, connectivity.Idle) + + // Start the new target server. + bm.backends[0].resume() + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + wantSCStates = []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + connectivity.Idle, + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_StopConnectedServer_FirstServerToSecond(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + balCh := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) + cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + // shutdown all active backends except the target. + bm.stopAllExcept(0) + + r.UpdateState(resolver.State{Addresses: addrs}) + var bal *stateStoringBalancer + select { + case bal = <-balCh: + case <-ctx.Done(): + t.Fatal("Context expired while waiting for balancer to be built") + } + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + wantSCStates := []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + // Shut down the connected server. + bm.backends[0].stop() + testutils.AwaitState(ctx, t, cc, connectivity.Idle) + + // Start the new target server. + bm.backends[1].resume() + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } + + wantSCStates = []scState{ + {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, + {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, + } + + if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) + } + + wantConnStateTransitions := []connectivity.State{ + connectivity.Connecting, + connectivity.Ready, + connectivity.Idle, + connectivity.Connecting, + connectivity.Ready, + } + if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +// TestPickFirstLeaf_EmptyAddressList carries out the following steps in order: +// 1. Send a resolver update with one running backend. +// 2. Send an empty address list causing the balancer to enter TRANSIENT_FAILURE. +// 3. Send a resolver update with one running backend. +// The test verifies the ClientConn state transitions. +func (s) TestPickFirstLeaf_EmptyAddressList(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + balChan := make(chan *stateStoringBalancer, 1) + balancer.Register(&stateStoringBalancerBuilder{balancer: balChan}) + cc, r, bm := setupPickFirstLeaf(t, 1, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) + addrs := bm.resolverAddrs() + + stateSubscriber := &ccStateSubscriber{} + internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) + + r.UpdateState(resolver.State{Addresses: addrs}) + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + r.UpdateState(resolver.State{}) + testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) + + // The balancer should have entered transient failure. + // It should transition to CONNECTING from TRANSIENT_FAILURE as sticky TF + // only applies when the initial TF is reported due to connection failures + // and not bad resolver states. + r.UpdateState(resolver.State{Addresses: addrs}) + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + wantTransitions := []connectivity.State{ + // From first resolver update. + connectivity.Connecting, + connectivity.Ready, + // From second update. + connectivity.TransientFailure, + // From third update. + connectivity.Connecting, + connectivity.Ready, + } + + if diff := cmp.Diff(wantTransitions, stateSubscriber.transitions()); diff != "" { + t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) + } +} + +// Test verifies that pickfirst correctly detects the end of the first happy +// eyeballs pass when the timer causes pickfirst to reach the end of the address +// list and failures are reported out of order. +func (s) TestPickFirstLeaf_HappyEyeballs_TF_AfterEndOfList(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + originalTimer := pfinternal.TimeAfterFunc + defer func() { + pfinternal.TimeAfterFunc = originalTimer + }() + triggerTimer, timeAfter := mockTimer() + pfinternal.TimeAfterFunc = timeAfter + + tmr := stats.NewTestMetricsRecorder() + dialer := testutils.NewBlockingDialer() + opts := []grpc.DialOption{ + grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), + grpc.WithContextDialer(dialer.DialContext), + grpc.WithStatsHandler(tmr), + } + cc, rb, bm := setupPickFirstLeaf(t, 3, opts...) + addrs := bm.resolverAddrs() + holds := bm.holds(dialer) + rb.UpdateState(resolver.State{Addresses: addrs}) + cc.Connect() + + testutils.AwaitState(ctx, t, cc, connectivity.Connecting) + + // Verify that only the first server is contacted. + if holds[0].Wait(ctx) != true { + t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 0, addrs[0]) + } + if holds[1].IsStarted() != false { + t.Fatalf("Server %d with address %q contacted unexpectedly", 1, addrs[1]) + } + if holds[2].IsStarted() != false { + t.Fatalf("Server %d with address %q contacted unexpectedly", 2, addrs[2]) + } + + // Make the happy eyeballs timer fire once and verify that the + // second server is contacted, but the third isn't. + triggerTimer() + if holds[1].Wait(ctx) != true { + t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 1, addrs[1]) + } + if holds[2].IsStarted() != false { + t.Fatalf("Server %d with address %q contacted unexpectedly", 2, addrs[2]) + } + + // Make the happy eyeballs timer fire once more and verify that the + // third server is contacted. + triggerTimer() + if holds[2].Wait(ctx) != true { + t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 2, addrs[2]) + } + + // First SubConn Fails. + holds[0].Fail(fmt.Errorf("test error")) + tmr.WaitForInt64CountIncr(ctx, 1) + + // No TF should be reported until the first pass is complete. + shortCtx, shortCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer shortCancel() + testutils.AwaitNotState(shortCtx, t, cc, connectivity.TransientFailure) + + // Third SubConn fails. + shortCtx, shortCancel = context.WithTimeout(ctx, defaultTestShortTimeout) + defer shortCancel() + holds[2].Fail(fmt.Errorf("test error")) + tmr.WaitForInt64CountIncr(ctx, 1) + testutils.AwaitNotState(shortCtx, t, cc, connectivity.TransientFailure) + + // Last SubConn fails, this should result in a TF update. + holds[1].Fail(fmt.Errorf("test error")) + tmr.WaitForInt64CountIncr(ctx, 1) + testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) + + // Only connection attempt fails in this test. + if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_succeeded"); got != 0 { + t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_succeeded", got, 0) + } + if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_failed"); got != 1 { + t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_failed", got, 1) + } + if got, _ := tmr.Metric("grpc.lb.pick_first.disconnections"); got != 0 { + t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.disconnections", got, 0) + } +} + +// Test verifies that pickfirst attempts to connect to the second backend once +// the happy eyeballs timer expires. +func (s) TestPickFirstLeaf_HappyEyeballs_TriggerConnectionDelay(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + originalTimer := pfinternal.TimeAfterFunc + defer func() { + pfinternal.TimeAfterFunc = originalTimer + }() + triggerTimer, timeAfter := mockTimer() + pfinternal.TimeAfterFunc = timeAfter + + tmr := stats.NewTestMetricsRecorder() + dialer := testutils.NewBlockingDialer() + opts := []grpc.DialOption{ + grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), + grpc.WithContextDialer(dialer.DialContext), + grpc.WithStatsHandler(tmr), + } + cc, rb, bm := setupPickFirstLeaf(t, 2, opts...) + addrs := bm.resolverAddrs() + holds := bm.holds(dialer) + rb.UpdateState(resolver.State{Addresses: addrs}) + cc.Connect() + + testutils.AwaitState(ctx, t, cc, connectivity.Connecting) + + // Verify that only the first server is contacted. + if holds[0].Wait(ctx) != true { + t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 0, addrs[0]) + } + if holds[1].IsStarted() != false { + t.Fatalf("Server %d with address %q contacted unexpectedly", 1, addrs[1]) + } + + // Make the happy eyeballs timer fire once and verify that the + // second server is contacted. + triggerTimer() + if holds[1].Wait(ctx) != true { + t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 1, addrs[1]) + } + + // Get the connection attempt to the second server to succeed and verify + // that the channel becomes READY. + holds[1].Resume() + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + // Only connection attempt successes in this test. + if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_succeeded"); got != 1 { + t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_succeeded", got, 1) + } + if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_failed"); got != 0 { + t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_failed", got, 0) + } + if got, _ := tmr.Metric("grpc.lb.pick_first.disconnections"); got != 0 { + t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.disconnections", got, 0) + } +} + +// Test tests the pickfirst balancer by causing a SubConn to fail and then +// jumping to the 3rd SubConn after the happy eyeballs timer expires. +func (s) TestPickFirstLeaf_HappyEyeballs_TF_ThenTimerFires(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + + originalTimer := pfinternal.TimeAfterFunc + defer func() { + pfinternal.TimeAfterFunc = originalTimer + }() + triggerTimer, timeAfter := mockTimer() + pfinternal.TimeAfterFunc = timeAfter + + tmr := stats.NewTestMetricsRecorder() + dialer := testutils.NewBlockingDialer() + opts := []grpc.DialOption{ + grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), + grpc.WithContextDialer(dialer.DialContext), + grpc.WithStatsHandler(tmr), + } + cc, rb, bm := setupPickFirstLeaf(t, 3, opts...) + addrs := bm.resolverAddrs() + holds := bm.holds(dialer) + rb.UpdateState(resolver.State{Addresses: addrs}) + cc.Connect() + + testutils.AwaitState(ctx, t, cc, connectivity.Connecting) + + // Verify that only the first server is contacted. + if holds[0].Wait(ctx) != true { + t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 0, addrs[0]) + } + if holds[1].IsStarted() != false { + t.Fatalf("Server %d with address %q contacted unexpectedly", 1, addrs[1]) + } + if holds[2].IsStarted() != false { + t.Fatalf("Server %d with address %q contacted unexpectedly", 2, addrs[2]) + } + + // First SubConn Fails. + holds[0].Fail(fmt.Errorf("test error")) + + // Verify that only the second server is contacted. + if holds[1].Wait(ctx) != true { + t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 1, addrs[1]) + } + if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_failed"); got != 1 { + t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_failed", got, 1) + } + if holds[2].IsStarted() != false { + t.Fatalf("Server %d with address %q contacted unexpectedly", 2, addrs[2]) + } + + // The happy eyeballs timer expires, pickfirst should stop waiting for + // server[1] to report a failure/success and request the creation of a third + // SubConn. + triggerTimer() + if holds[2].Wait(ctx) != true { + t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 2, addrs[2]) + } + + // Get the connection attempt to the second server to succeed and verify + // that the channel becomes READY. + holds[1].Resume() + testutils.AwaitState(ctx, t, cc, connectivity.Ready) + + if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_succeeded"); got != 1 { + t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_succeeded", got, 1) + } + if got, _ := tmr.Metric("grpc.lb.pick_first.disconnections"); got != 0 { + t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.disconnections", got, 0) + } +} + +func (s) TestPickFirstLeaf_InterleavingIPV4Preffered(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + cc := testutils.NewBalancerClientConn(t) + bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) + defer bal.Close() + ccState := balancer.ClientConnState{ + ResolverState: resolver.State{ + Endpoints: []resolver.Endpoint{ + {Addresses: []resolver.Address{{Addr: "1.1.1.1:1111"}}}, + {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, + {Addresses: []resolver.Address{{Addr: "3.3.3.3:3"}}}, + // IPv4-mapped IPv6 address, considered as an IPv4 for + // interleaving. + {Addresses: []resolver.Address{{Addr: "[::FFFF:192.168.0.1]:2222"}}}, + {Addresses: []resolver.Address{{Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}}}, + {Addresses: []resolver.Address{{Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:8080"}}}, + {Addresses: []resolver.Address{{Addr: "[fe80::1%eth0]:3333"}}}, + {Addresses: []resolver.Address{{Addr: "grpc.io:80"}}}, // not an IP. + }, + }, + } + if err := bal.UpdateClientConnState(ccState); err != nil { + t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) + } + + wantAddrs := []resolver.Address{ + {Addr: "1.1.1.1:1111"}, + {Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}, + {Addr: "grpc.io:80"}, + {Addr: "2.2.2.2:2"}, + {Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:8080"}, + {Addr: "3.3.3.3:3"}, + {Addr: "[fe80::1%eth0]:3333"}, + {Addr: "[::FFFF:192.168.0.1]:2222"}, + } + + gotAddrs, err := subConnAddresses(ctx, cc, 8) + if err != nil { + t.Fatalf("%v", err) + } + if diff := cmp.Diff(wantAddrs, gotAddrs, ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn creation order mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_InterleavingIPv6Preffered(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + cc := testutils.NewBalancerClientConn(t) + bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) + defer bal.Close() + ccState := balancer.ClientConnState{ + ResolverState: resolver.State{ + Endpoints: []resolver.Endpoint{ + {Addresses: []resolver.Address{{Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}}}, + {Addresses: []resolver.Address{{Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}}}, // duplicate, should be ignored. + {Addresses: []resolver.Address{{Addr: "1.1.1.1:1111"}}}, + {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, + {Addresses: []resolver.Address{{Addr: "3.3.3.3:3"}}}, + {Addresses: []resolver.Address{{Addr: "[::FFFF:192.168.0.1]:2222"}}}, + {Addresses: []resolver.Address{{Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:2222"}}}, + {Addresses: []resolver.Address{{Addr: "[fe80::1%eth0]:3333"}}}, + {Addresses: []resolver.Address{{Addr: "grpc.io:80"}}}, // not an IP. + }, + }, + } + if err := bal.UpdateClientConnState(ccState); err != nil { + t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) + } + + wantAddrs := []resolver.Address{ + {Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}, + {Addr: "1.1.1.1:1111"}, + {Addr: "grpc.io:80"}, + {Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:2222"}, + {Addr: "2.2.2.2:2"}, + {Addr: "[fe80::1%eth0]:3333"}, + {Addr: "3.3.3.3:3"}, + {Addr: "[::FFFF:192.168.0.1]:2222"}, + } + + gotAddrs, err := subConnAddresses(ctx, cc, 8) + if err != nil { + t.Fatalf("%v", err) + } + if diff := cmp.Diff(wantAddrs, gotAddrs, ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn creation order mismatch (-want +got):\n%s", diff) + } +} + +func (s) TestPickFirstLeaf_InterleavingUnknownPreffered(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + cc := testutils.NewBalancerClientConn(t) + bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) + defer bal.Close() + ccState := balancer.ClientConnState{ + ResolverState: resolver.State{ + Endpoints: []resolver.Endpoint{ + {Addresses: []resolver.Address{{Addr: "grpc.io:80"}}}, // not an IP. + {Addresses: []resolver.Address{{Addr: "1.1.1.1:1111"}}}, + {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, + {Addresses: []resolver.Address{{Addr: "3.3.3.3:3"}}}, + {Addresses: []resolver.Address{{Addr: "[::FFFF:192.168.0.1]:2222"}}}, + {Addresses: []resolver.Address{{Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}}}, + {Addresses: []resolver.Address{{Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:8080"}}}, + {Addresses: []resolver.Address{{Addr: "[fe80::1%eth0]:3333"}}}, + {Addresses: []resolver.Address{{Addr: "example.com:80"}}}, // not an IP. + }, + }, + } + if err := bal.UpdateClientConnState(ccState); err != nil { + t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) + } + + wantAddrs := []resolver.Address{ + {Addr: "grpc.io:80"}, + {Addr: "1.1.1.1:1111"}, + {Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}, + {Addr: "example.com:80"}, + {Addr: "2.2.2.2:2"}, + {Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:8080"}, + {Addr: "3.3.3.3:3"}, + {Addr: "[fe80::1%eth0]:3333"}, + {Addr: "[::FFFF:192.168.0.1]:2222"}, + } + + gotAddrs, err := subConnAddresses(ctx, cc, 9) + if err != nil { + t.Fatalf("%v", err) + } + if diff := cmp.Diff(wantAddrs, gotAddrs, ignoreBalAttributesOpt); diff != "" { + t.Errorf("SubConn creation order mismatch (-want +got):\n%s", diff) + } +} + +// Test verifies that pickfirst balancer transitions to READY when the health +// listener is enabled. Since client side health checking is not enabled in +// the service config, the health listener will send a health update for READY +// after registering the listener. +func (s) TestPickFirstLeaf_HealthListenerEnabled(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + bf := stub.BalancerFuncs{ + Init: func(bd *stub.BalancerData) { + bd.ChildBalancer = balancer.Get(pfbalancer.Name).Build(bd.ClientConn, bd.BuildOptions) + }, + Close: func(bd *stub.BalancerData) { + bd.ChildBalancer.Close() + }, + UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { + ccs.ResolverState = pfbalancer.EnableHealthListener(ccs.ResolverState) + return bd.ChildBalancer.UpdateClientConnState(ccs) + }, + } + + stub.Register(t.Name(), bf) + svcCfg := fmt.Sprintf(`{ "loadBalancingConfig": [{%q: {}}] }`, t.Name()) + backend := stubserver.StartTestService(t, nil) + defer backend.Stop() + opts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithDefaultServiceConfig(svcCfg), + } + cc, err := grpc.NewClient(backend.Address, opts...) + if err != nil { + t.Fatalf("grpc.NewClient(%q) failed: %v", backend.Address, err) + + } + defer cc.Close() + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, resolver.Address{Addr: backend.Address}); err != nil { + t.Fatal(err) + } +} + +// Test verifies that a health listener is not registered when pickfirst is not +// under a petiole policy. +func (s) TestPickFirstLeaf_HealthListenerNotEnabled(t *testing.T) { + // Wrap the clientconn to intercept NewSubConn. + // Capture the health list by wrapping the SC. + // Wrap the picker to unwrap the SC. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + healthListenerCh := make(chan func(balancer.SubConnState)) + + bf := stub.BalancerFuncs{ + Init: func(bd *stub.BalancerData) { + ccw := &healthListenerCapturingCCWrapper{ + ClientConn: bd.ClientConn, + healthListenerCh: healthListenerCh, + subConnStateCh: make(chan balancer.SubConnState, 5), + } + bd.ChildBalancer = balancer.Get(pfbalancer.Name).Build(ccw, bd.BuildOptions) + }, + Close: func(bd *stub.BalancerData) { + bd.ChildBalancer.Close() + }, + UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { + // Functions like a non-petiole policy by not configuring the use + // of health listeners. + return bd.ChildBalancer.UpdateClientConnState(ccs) + }, + } + + stub.Register(t.Name(), bf) + svcCfg := fmt.Sprintf(`{ "loadBalancingConfig": [{%q: {}}] }`, t.Name()) + backend := stubserver.StartTestService(t, nil) + defer backend.Stop() + opts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithDefaultServiceConfig(svcCfg), + } + cc, err := grpc.NewClient(backend.Address, opts...) + if err != nil { + t.Fatalf("grpc.NewClient(%q) failed: %v", backend.Address, err) + + } + defer cc.Close() + cc.Connect() + + select { + case <-healthListenerCh: + t.Fatal("Health listener registered when not enabled.") + case <-time.After(defaultTestShortTimeout): + } + + testutils.AwaitState(ctx, t, cc, connectivity.Ready) +} + +// Test mocks the updates sent to the health listener and verifies that the +// balancer correctly reports the health state once the SubConn's connectivity +// state becomes READY. +func (s) TestPickFirstLeaf_HealthUpdates(t *testing.T) { + // Wrap the clientconn to intercept NewSubConn. + // Capture the health list by wrapping the SC. + // Wrap the picker to unwrap the SC. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + healthListenerCh := make(chan func(balancer.SubConnState)) + scConnectivityStateCh := make(chan balancer.SubConnState, 5) + + bf := stub.BalancerFuncs{ + Init: func(bd *stub.BalancerData) { + ccw := &healthListenerCapturingCCWrapper{ + ClientConn: bd.ClientConn, + healthListenerCh: healthListenerCh, + subConnStateCh: scConnectivityStateCh, + } + bd.ChildBalancer = balancer.Get(pfbalancer.Name).Build(ccw, bd.BuildOptions) + }, + Close: func(bd *stub.BalancerData) { + bd.ChildBalancer.Close() + }, + UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { + ccs.ResolverState = pfbalancer.EnableHealthListener(ccs.ResolverState) + return bd.ChildBalancer.UpdateClientConnState(ccs) + }, + } + + stub.Register(t.Name(), bf) + svcCfg := fmt.Sprintf(`{ "loadBalancingConfig": [{%q: {}}] }`, t.Name()) + backend := stubserver.StartTestService(t, nil) + defer backend.Stop() + opts := []grpc.DialOption{ + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithDefaultServiceConfig(svcCfg), + } + cc, err := grpc.NewClient(backend.Address, opts...) + if err != nil { + t.Fatalf("grpc.NewClient(%q) failed: %v", backend.Address, err) + + } + defer cc.Close() + cc.Connect() + + var healthListener func(balancer.SubConnState) + select { + case healthListener = <-healthListenerCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for health listener to be registered.") + } + + // Wait for the raw connectivity state to become READY. The LB policy should + // wait for the health updates before transitioning the channel to READY. + for { + var scs balancer.SubConnState + select { + case scs = <-scConnectivityStateCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for the SubConn connectivity state to become READY.") + } + if scs.ConnectivityState == connectivity.Ready { + break + } + } + + shortCtx, cancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer cancel() + testutils.AwaitNoStateChange(shortCtx, t, cc, connectivity.Connecting) + + // The LB policy should update the channel state based on the health state. + healthListener(balancer.SubConnState{ + ConnectivityState: connectivity.TransientFailure, + ConnectionError: fmt.Errorf("test health check failure"), + }) + testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) + + healthListener(balancer.SubConnState{ + ConnectivityState: connectivity.Connecting, + ConnectionError: balancer.ErrNoSubConnAvailable, + }) + testutils.AwaitState(ctx, t, cc, connectivity.Connecting) + + healthListener(balancer.SubConnState{ + ConnectivityState: connectivity.Ready, + }) + if err := pickfirst.CheckRPCsToBackend(ctx, cc, resolver.Address{Addr: backend.Address}); err != nil { + t.Fatal(err) + } + + // When the health check fails, the channel should transition to TF. + healthListener(balancer.SubConnState{ + ConnectivityState: connectivity.TransientFailure, + ConnectionError: fmt.Errorf("test health check failure"), + }) + testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) +} + +// Tests the case where an address update received by the pick_first LB policy +// differs in metadata which should be ignored by the LB policy. In this case, +// the test verifies that new connections are not created when the address +// update only changes the metadata. +func (s) TestPickFirstLeaf_AddressUpdateWithMetadata(t *testing.T) { + dialer := testutils.NewBlockingDialer() + dopts := []grpc.DialOption{ + grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), + grpc.WithContextDialer(dialer.DialContext), + } + cc, r, backends := setupPickFirstLeaf(t, 2, dopts...) + + // Add a metadata to the addresses before pushing them to the pick_first LB + // policy through the manual resolver. + addrs := backends.resolverAddrs() + for i := range addrs { + addrs[i].Metadata = &metadata.MD{ + "test-metadata-1": []string{fmt.Sprintf("%d", i)}, + } + } + r.UpdateState(resolver.State{Addresses: addrs}) + + // Ensure that RPCs succeed to the expected backend. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + // Create holds for each backend. This will be used to verify the connection + // is not re-established. + holds := backends.holds(dialer) + + // Add metadata to the addresses before pushing them to the pick_first LB + // policy through the manual resolver. Leave the order of the addresses + // unchanged. + for i := range addrs { + addrs[i].Metadata = &metadata.MD{ + "test-metadata-2": []string{fmt.Sprintf("%d", i)}, + } + } + r.UpdateState(resolver.State{Addresses: addrs}) + + // Ensure that no new connection is established. + for i := range holds { + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if holds[i].Wait(sCtx) { + t.Fatalf("Unexpected connection attempt to backend: %s", addrs[i]) + } + } + + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { + t.Fatal(err) + } + + // Add metadata to the addresses before pushing them to the pick_first LB + // policy through the manual resolver. Reverse of the order of addresses. + for i := range addrs { + addrs[i].Metadata = &metadata.MD{ + "test-metadata-3": []string{fmt.Sprintf("%d", i)}, + } + } + addrs[0], addrs[1] = addrs[1], addrs[0] + r.UpdateState(resolver.State{Addresses: addrs}) + + // Ensure that no new connection is established. + for i := range holds { + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + if holds[i].Wait(sCtx) { + t.Fatalf("Unexpected connection attempt to backend: %s", addrs[i]) + } + } + if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { + t.Fatal(err) + } +} + +// Tests the scenario where a connection is established and then breaks, leading +// to a reconnection attempt. While the reconnection is in progress, a resolver +// update with a new address is received. The test verifies that the balancer +// creates a new SubConn for the new address and that the ClientConn eventually +// becomes READY. +func (s) TestPickFirstLeaf_Reconnection(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + cc := testutils.NewBalancerClientConn(t) + bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) + defer bal.Close() + ccState := balancer.ClientConnState{ + ResolverState: resolver.State{ + Endpoints: []resolver.Endpoint{ + {Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}, + }, + }, + } + if err := bal.UpdateClientConnState(ccState); err != nil { + t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) + } + + select { + case state := <-cc.NewStateCh: + if got, want := state, connectivity.Connecting; got != want { + t.Fatalf("Received unexpected ClientConn sate: got %v, want %v", got, want) + } + case <-ctx.Done(): + t.Fatal("Context timed out waiting for ClientConn state update.") + } + + sc1 := <-cc.NewSubConnCh + select { + case <-sc1.ConnectCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for Connect() to be called on sc1.") + } + sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) + sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready}) + + if err := cc.WaitForConnectivityState(ctx, connectivity.Ready); err != nil { + t.Fatalf("Context timed out waiting for ClientConn to become READY.") + } + + // Simulate a connection breakage, this should result the channel + // transitioning to IDLE. + sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Idle}) + if err := cc.WaitForConnectivityState(ctx, connectivity.Idle); err != nil { + t.Fatalf("Context timed out waiting for ClientConn to enter IDLE.") + } + + // Calling the idle picker should result in the SubConn being re-connected. + picker := <-cc.NewPickerCh + if _, err := picker.Pick(balancer.PickInfo{}); err != balancer.ErrNoSubConnAvailable { + t.Fatalf("picker.Pick() returned error: %v, want %v", err, balancer.ErrNoSubConnAvailable) + } + + select { + case <-sc1.ConnectCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for Connect() to be called on sc1.") + } + + // Send a resolver update, removing the existing SubConn. Since the balancer + // is connecting, it should create a new SubConn for the new backend + // address. + ccState = balancer.ClientConnState{ + ResolverState: resolver.State{ + Endpoints: []resolver.Endpoint{ + {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, + }, + }, + } + if err := bal.UpdateClientConnState(ccState); err != nil { + t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) + } + + var sc2 *testutils.TestSubConn + select { + case sc2 = <-cc.NewSubConnCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for new SubConn to be created.") + } + + select { + case <-sc2.ConnectCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for Connect() to be called on sc2.") + } + sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) + sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready}) + if err := cc.WaitForConnectivityState(ctx, connectivity.Ready); err != nil { + t.Fatalf("Context timed out waiting for ClientConn to become READY.") + } +} + +// healthListenerCapturingCCWrapper is used to capture the health listener so +// that health updates can be mocked for testing. +type healthListenerCapturingCCWrapper struct { + balancer.ClientConn + healthListenerCh chan func(balancer.SubConnState) + subConnStateCh chan balancer.SubConnState +} + +func (ccw *healthListenerCapturingCCWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { + oldListener := opts.StateListener + opts.StateListener = func(scs balancer.SubConnState) { + ccw.subConnStateCh <- scs + if oldListener != nil { + oldListener(scs) + } + } + sc, err := ccw.ClientConn.NewSubConn(addrs, opts) + if err != nil { + return nil, err + } + return &healthListenerCapturingSCWrapper{ + SubConn: sc, + listenerCh: ccw.healthListenerCh, + }, nil +} + +func (ccw *healthListenerCapturingCCWrapper) UpdateState(state balancer.State) { + state.Picker = &unwrappingPicker{state.Picker} + ccw.ClientConn.UpdateState(state) +} + +type healthListenerCapturingSCWrapper struct { + balancer.SubConn + listenerCh chan func(balancer.SubConnState) +} + +func (scw *healthListenerCapturingSCWrapper) RegisterHealthListener(listener func(balancer.SubConnState)) { + scw.listenerCh <- listener +} + +// unwrappingPicker unwraps SubConns because the channel expects SubConns to be +// addrConns. +type unwrappingPicker struct { + balancer.Picker +} + +func (pw *unwrappingPicker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { + pr, err := pw.Picker.Pick(info) + if pr.SubConn != nil { + pr.SubConn = pr.SubConn.(*healthListenerCapturingSCWrapper).SubConn + } + return pr, err +} + +// subConnAddresses makes the pickfirst balancer create the requested number of +// SubConns by triggering transient failures. The function returns the +// addresses of the created SubConns. +func subConnAddresses(ctx context.Context, cc *testutils.BalancerClientConn, subConnCount int) ([]resolver.Address, error) { + addresses := []resolver.Address{} + for i := 0; i < subConnCount; i++ { + select { + case <-ctx.Done(): + return nil, fmt.Errorf("test timed out after creating %d subchannels, want %d", i, subConnCount) + case sc := <-cc.NewSubConnCh: + if len(sc.Addresses) != 1 { + return nil, fmt.Errorf("new subchannel created with %d addresses, want 1", len(sc.Addresses)) + } + addresses = append(addresses, sc.Addresses[0]) + sc.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) + sc.UpdateState(balancer.SubConnState{ + ConnectivityState: connectivity.TransientFailure, + }) + } + } + return addresses, nil +} + +// stateStoringBalancer stores the state of the SubConns being created. +type stateStoringBalancer struct { + balancer.Balancer + mu sync.Mutex + scStates []*scState +} + +func (b *stateStoringBalancer) Close() { + b.Balancer.Close() +} + +type stateStoringBalancerBuilder struct { + balancer chan *stateStoringBalancer +} + +func (b *stateStoringBalancerBuilder) Name() string { + return stateStoringBalancerName +} + +func (b *stateStoringBalancerBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { + bal := &stateStoringBalancer{} + bal.Balancer = balancer.Get(pfbalancer.Name).Build(&stateStoringCCWrapper{cc, bal}, opts) + b.balancer <- bal + return bal +} + +func (b *stateStoringBalancer) subConnStates() []scState { + b.mu.Lock() + defer b.mu.Unlock() + ret := []scState{} + for _, s := range b.scStates { + ret = append(ret, *s) + } + return ret +} + +func (b *stateStoringBalancer) addSCState(state *scState) { + b.mu.Lock() + b.scStates = append(b.scStates, state) + b.mu.Unlock() +} + +type stateStoringCCWrapper struct { + balancer.ClientConn + b *stateStoringBalancer +} + +func (ccw *stateStoringCCWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { + oldListener := opts.StateListener + scs := &scState{ + State: connectivity.Idle, + Addrs: addrs, + } + ccw.b.addSCState(scs) + opts.StateListener = func(s balancer.SubConnState) { + ccw.b.mu.Lock() + scs.State = s.ConnectivityState + ccw.b.mu.Unlock() + oldListener(s) + } + return ccw.ClientConn.NewSubConn(addrs, opts) +} + +type scState struct { + State connectivity.State + Addrs []resolver.Address +} + +type backendManager struct { + backends []*testServer +} + +func (b *backendManager) stopAllExcept(index int) { + for idx, b := range b.backends { + if idx != index { + b.stop() + } + } +} + +// resolverAddrs returns a list of resolver addresses for the stub server +// backends. Useful when pushing addresses to the manual resolver. +func (b *backendManager) resolverAddrs() []resolver.Address { + addrs := make([]resolver.Address, len(b.backends)) + for i, backend := range b.backends { + addrs[i] = resolver.Address{Addr: backend.Address} + } + return addrs +} + +func (b *backendManager) holds(dialer *testutils.BlockingDialer) []*testutils.Hold { + holds := []*testutils.Hold{} + for _, addr := range b.resolverAddrs() { + holds = append(holds, dialer.Hold(addr.Addr)) + } + return holds +} + +type ccStateSubscriber struct { + mu sync.Mutex + states []connectivity.State +} + +// transitions returns all the states that ccStateSubscriber recorded. +// Without this a race condition occurs when the test compares the states +// and the subscriber at the same time receives a connectivity.Shutdown. +func (c *ccStateSubscriber) transitions() []connectivity.State { + c.mu.Lock() + defer c.mu.Unlock() + return c.states +} + +func (c *ccStateSubscriber) OnMessage(msg any) { + c.mu.Lock() + defer c.mu.Unlock() + c.states = append(c.states, msg.(connectivity.State)) +} + +// mockTimer returns a fake timeAfterFunc that will not trigger automatically. +// It returns a function that can be called to manually trigger the execution +// of the scheduled callback. +func mockTimer() (triggerFunc func(), timerFunc func(_ time.Duration, f func()) func()) { + timerCh := make(chan struct{}) + triggerFunc = func() { + timerCh <- struct{}{} + } + return triggerFunc, func(_ time.Duration, f func()) func() { + stopCh := make(chan struct{}) + go func() { + select { + case <-timerCh: + f() + case <-stopCh: + } + }() + return sync.OnceFunc(func() { + close(stopCh) + }) + } +} diff --git a/balancer/pickfirst/pickfirst_test.go b/balancer/pickfirst/pickfirst_test.go index 0b360c3b31ed..9052c1fe7a11 100644 --- a/balancer/pickfirst/pickfirst_test.go +++ b/balancer/pickfirst/pickfirst_test.go @@ -25,6 +25,7 @@ import ( "testing" "time" + "google.golang.org/grpc/attributes" "google.golang.org/grpc/balancer" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/grpctest" @@ -130,3 +131,246 @@ func (s) TestPickFirst_ResolverErrorinTF(t *testing.T) { t.Fatalf("Unexpected SubConn shutdown: %v", sc) } } + +// TestAddressList_Iteration verifies the behaviour of the addressList while +// iterating through the entries. +func (s) TestAddressList_Iteration(t *testing.T) { + addrs := []resolver.Address{ + { + Addr: "192.168.1.1", + ServerName: "test-host-1", + Attributes: attributes.New("key-1", "val-1"), + BalancerAttributes: attributes.New("bal-key-1", "bal-val-1"), + }, + { + Addr: "192.168.1.2", + ServerName: "test-host-2", + Attributes: attributes.New("key-2", "val-2"), + BalancerAttributes: attributes.New("bal-key-2", "bal-val-2"), + }, + { + Addr: "192.168.1.3", + ServerName: "test-host-3", + Attributes: attributes.New("key-3", "val-3"), + BalancerAttributes: attributes.New("bal-key-3", "bal-val-3"), + }, + } + + addressList := addressList{} + addressList.updateAddrs(addrs) + + for i := 0; i < len(addrs); i++ { + if got, want := addressList.isValid(), true; got != want { + t.Fatalf("addressList.isValid() = %t, want %t", got, want) + } + if got, want := addressList.currentAddress(), addrs[i]; !want.Equal(got) { + t.Errorf("addressList.currentAddress() = %v, want %v", got, want) + } + if got, want := addressList.increment(), i+1 < len(addrs); got != want { + t.Fatalf("addressList.increment() = %t, want %t", got, want) + } + } + + if got, want := addressList.isValid(), false; got != want { + t.Fatalf("addressList.isValid() = %t, want %t", got, want) + } + + // increment an invalid address list. + if got, want := addressList.increment(), false; got != want { + t.Errorf("addressList.increment() = %t, want %t", got, want) + } + + if got, want := addressList.isValid(), false; got != want { + t.Errorf("addressList.isValid() = %t, want %t", got, want) + } + + addressList.reset() + for i := 0; i < len(addrs); i++ { + if got, want := addressList.isValid(), true; got != want { + t.Fatalf("addressList.isValid() = %t, want %t", got, want) + } + if got, want := addressList.currentAddress(), addrs[i]; !want.Equal(got) { + t.Errorf("addressList.currentAddress() = %v, want %v", got, want) + } + if got, want := addressList.increment(), i+1 < len(addrs); got != want { + t.Fatalf("addressList.increment() = %t, want %t", got, want) + } + } +} + +// TestAddressList_SeekTo verifies the behaviour of addressList.seekTo. +func (s) TestAddressList_SeekTo(t *testing.T) { + addrs := []resolver.Address{ + { + Addr: "192.168.1.1", + ServerName: "test-host-1", + Attributes: attributes.New("key-1", "val-1"), + BalancerAttributes: attributes.New("bal-key-1", "bal-val-1"), + }, + { + Addr: "192.168.1.2", + ServerName: "test-host-2", + Attributes: attributes.New("key-2", "val-2"), + BalancerAttributes: attributes.New("bal-key-2", "bal-val-2"), + }, + { + Addr: "192.168.1.3", + ServerName: "test-host-3", + Attributes: attributes.New("key-3", "val-3"), + BalancerAttributes: attributes.New("bal-key-3", "bal-val-3"), + }, + } + + addressList := addressList{} + addressList.updateAddrs(addrs) + + // Try finding an address in the list. + key := resolver.Address{ + Addr: "192.168.1.2", + ServerName: "test-host-2", + Attributes: attributes.New("key-2", "val-2"), + BalancerAttributes: attributes.New("ignored", "bal-val-2"), + } + + if got, want := addressList.seekTo(key), true; got != want { + t.Errorf("addressList.seekTo(%v) = %t, want %t", key, got, want) + } + + // It should be possible to increment once more now that the pointer has advanced. + if got, want := addressList.increment(), true; got != want { + t.Errorf("addressList.increment() = %t, want %t", got, want) + } + + if got, want := addressList.increment(), false; got != want { + t.Errorf("addressList.increment() = %t, want %t", got, want) + } + + // Seek to the key again, it is behind the pointer now. + if got, want := addressList.seekTo(key), true; got != want { + t.Errorf("addressList.seekTo(%v) = %t, want %t", key, got, want) + } + + // Seek to a key not in the list. + key = resolver.Address{ + Addr: "192.168.1.5", + ServerName: "test-host-5", + Attributes: attributes.New("key-5", "val-5"), + BalancerAttributes: attributes.New("ignored", "bal-val-5"), + } + + if got, want := addressList.seekTo(key), false; got != want { + t.Errorf("addressList.seekTo(%v) = %t, want %t", key, got, want) + } + + // It should be possible to increment once more since the pointer has not advanced. + if got, want := addressList.increment(), true; got != want { + t.Errorf("addressList.increment() = %t, want %t", got, want) + } + + if got, want := addressList.increment(), false; got != want { + t.Errorf("addressList.increment() = %t, want %t", got, want) + } +} + +// TestPickFirstLeaf_TFPickerUpdate sends TRANSIENT_FAILURE SubConn state updates +// for each SubConn managed by a pickfirst balancer. It verifies that the picker +// is updated with the expected frequency. +func (s) TestPickFirstLeaf_TFPickerUpdate(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + cc := testutils.NewBalancerClientConn(t) + bal := pickfirstBuilder{}.Build(cc, balancer.BuildOptions{}) + defer bal.Close() + ccState := balancer.ClientConnState{ + ResolverState: resolver.State{ + Endpoints: []resolver.Endpoint{ + {Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}, + {Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}, // duplicate, should be ignored. + {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, + {Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}, // duplicate, should be ignored. + }, + }, + } + if err := bal.UpdateClientConnState(ccState); err != nil { + t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) + } + + // PF should report TRANSIENT_FAILURE only once all the sunbconns have failed + // once. + tfErr := fmt.Errorf("test err: connection refused") + sc1 := <-cc.NewSubConnCh + select { + case <-sc1.ConnectCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for Connect() to be called on sc1.") + } + sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) + sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.TransientFailure, ConnectionError: tfErr}) + + // Move the subconn back to IDLE, it should not be re-connected until the + // first pass is complete. + shortCtx, shortCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer shortCancel() + sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Idle}) + select { + case <-sc1.ConnectCh: + t.Fatal("Connect() unexpectedly called on sc1.") + case <-shortCtx.Done(): + } + + if err := cc.WaitForPickerWithErr(ctx, balancer.ErrNoSubConnAvailable); err != nil { + t.Fatalf("cc.WaitForPickerWithErr(%v) returned error: %v", balancer.ErrNoSubConnAvailable, err) + } + + sc2 := <-cc.NewSubConnCh + select { + case <-sc2.ConnectCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for Connect() to be called on sc2.") + } + sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) + sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.TransientFailure, ConnectionError: tfErr}) + + if err := cc.WaitForPickerWithErr(ctx, tfErr); err != nil { + t.Fatalf("cc.WaitForPickerWithErr(%v) returned error: %v", tfErr, err) + } + + // Subsequent TRANSIENT_FAILUREs should be reported only after seeing "# of SubConns" + // TRANSIENT_FAILUREs. + + // Both the subconns should be connected in parallel. + select { + case <-sc1.ConnectCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for Connect() to be called on sc1.") + } + + shortCtx, shortCancel = context.WithTimeout(ctx, defaultTestShortTimeout) + defer shortCancel() + select { + case <-sc2.ConnectCh: + t.Fatal("Connect() called on sc2 before it completed backing-off.") + case <-shortCtx.Done(): + } + + sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Idle}) + select { + case <-sc2.ConnectCh: + case <-ctx.Done(): + t.Fatal("Context timed out waiting for Connect() to be called on sc2.") + } + + newTfErr := fmt.Errorf("test err: unreachable") + sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.TransientFailure, ConnectionError: newTfErr}) + select { + case <-time.After(defaultTestShortTimeout): + case p := <-cc.NewPickerCh: + sc, err := p.Pick(balancer.PickInfo{}) + t.Fatalf("Unexpected picker update: %v, %v", sc, err) + } + + sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.TransientFailure, ConnectionError: newTfErr}) + if err := cc.WaitForPickerWithErr(ctx, newTfErr); err != nil { + t.Fatalf("cc.WaitForPickerWithErr(%v) returned error: %v", newTfErr, err) + } +} diff --git a/balancer/pickfirst/pickfirstleaf_ext_test.go b/balancer/pickfirst/pickfirstleaf_ext_test.go deleted file mode 100644 index 2263b1a4d54e..000000000000 --- a/balancer/pickfirst/pickfirstleaf_ext_test.go +++ /dev/null @@ -1,1820 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package pickfirst_test - -import ( - "context" - "fmt" - "sync" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - - "google.golang.org/grpc" - "google.golang.org/grpc/balancer" - pfbalancer "google.golang.org/grpc/balancer/pickfirst" - pfinternal "google.golang.org/grpc/balancer/pickfirst/internal" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/connectivity" - "google.golang.org/grpc/credentials/insecure" - "google.golang.org/grpc/internal" - "google.golang.org/grpc/internal/balancer/stub" - "google.golang.org/grpc/internal/grpcsync" - "google.golang.org/grpc/internal/grpctest" - "google.golang.org/grpc/internal/stubserver" - "google.golang.org/grpc/internal/testutils" - "google.golang.org/grpc/internal/testutils/pickfirst" - "google.golang.org/grpc/internal/testutils/stats" - "google.golang.org/grpc/metadata" - "google.golang.org/grpc/resolver" - "google.golang.org/grpc/resolver/manual" - "google.golang.org/grpc/status" - - testgrpc "google.golang.org/grpc/interop/grpc_testing" - testpb "google.golang.org/grpc/interop/grpc_testing" -) - -const ( - // Default timeout for tests in this package. - defaultTestTimeout = 10 * time.Second - // Default short timeout, to be used when waiting for events which are not - // expected to happen. - defaultTestShortTimeout = 100 * time.Millisecond - stateStoringBalancerName = "state_storing" -) - -var ( - stateStoringServiceConfig = fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, stateStoringBalancerName) - ignoreBalAttributesOpt = cmp.Transformer("IgnoreBalancerAttributes", func(a resolver.Address) resolver.Address { - a.BalancerAttributes = nil - return a - }) -) - -type s struct { - grpctest.Tester -} - -func Test(t *testing.T) { - grpctest.RunSubTests(t, s{}) -} - -// testServer is a server than can be stopped and resumed without closing -// the listener. This guarantees the same port number (and address) is used -// after restart. When a server is stopped, it accepts and closes all tcp -// connections from clients. -type testServer struct { - stubserver.StubServer - lis *testutils.RestartableListener -} - -func (s *testServer) stop() { - s.lis.Stop() -} - -func (s *testServer) resume() { - s.lis.Restart() -} - -func newTestServer(t *testing.T) *testServer { - l, err := testutils.LocalTCPListener() - if err != nil { - t.Fatalf("Failed to create listener: %v", err) - } - rl := testutils.NewRestartableListener(l) - ss := stubserver.StubServer{ - EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return &testpb.Empty{}, nil }, - Listener: rl, - } - return &testServer{ - StubServer: ss, - lis: rl, - } -} - -// setupPickFirstLeaf performs steps required for pick_first tests. It starts a -// bunch of backends exporting the TestService, and creates a ClientConn to them. -func setupPickFirstLeaf(t *testing.T, backendCount int, opts ...grpc.DialOption) (*grpc.ClientConn, *manual.Resolver, *backendManager) { - t.Helper() - r := manual.NewBuilderWithScheme("whatever") - backends := make([]*testServer, backendCount) - addrs := make([]resolver.Address, backendCount) - - for i := 0; i < backendCount; i++ { - server := newTestServer(t) - backend := stubserver.StartTestService(t, &server.StubServer) - t.Cleanup(func() { - backend.Stop() - }) - backends[i] = server - addrs[i] = resolver.Address{Addr: backend.Address} - } - - dopts := []grpc.DialOption{ - grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithResolvers(r), - } - dopts = append(dopts, opts...) - cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...) - if err != nil { - t.Fatalf("grpc.NewClient() failed: %v", err) - } - t.Cleanup(func() { cc.Close() }) - - // At this point, the resolver has not returned any addresses to the channel. - // This RPC must block until the context expires. - sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer sCancel() - client := testgrpc.NewTestServiceClient(cc) - if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded { - t.Fatalf("EmptyCall() = %s, want %s", status.Code(err), codes.DeadlineExceeded) - } - return cc, r, &backendManager{backends} -} - -// TestPickFirstLeaf_SimpleResolverUpdate tests the behaviour of the pick first -// policy when given an list of addresses. The following steps are carried -// out in order: -// 1. A list of addresses are given through the resolver. Only one -// of the servers is running. -// 2. RPCs are sent to verify they reach the running server. -// -// The state transitions of the ClientConn and all the SubConns created are -// verified. -func (s) TestPickFirstLeaf_SimpleResolverUpdate_FirstServerReady(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - - cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - r.UpdateState(resolver.State{Addresses: addrs}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, - } - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_SimpleResolverUpdate_FirstServerUnReady(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - - cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - bm.stopAllExcept(1) - - r.UpdateState(resolver.State{Addresses: addrs}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_SimpleResolverUpdate_DuplicateAddrs(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - - cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - bm.stopAllExcept(1) - - // Add a duplicate entry in the addresslist - r.UpdateState(resolver.State{ - Addresses: append([]resolver.Address{addrs[0]}, addrs...), - }) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -// TestPickFirstLeaf_ResolverUpdates_DisjointLists tests the behaviour of the pick first -// policy when the following steps are carried out in order: -// 1. A list of addresses are given through the resolver. Only one -// of the servers is running. -// 2. RPCs are sent to verify they reach the running server. -// 3. A second resolver update is sent. Again, only one of the servers is -// running. This may not be the same server as before. -// 4. RPCs are sent to verify they reach the running server. -// -// The state transitions of the ClientConn and all the SubConns created are -// verified. -func (s) TestPickFirstLeaf_ResolverUpdates_DisjointLists(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - cc, r, bm := setupPickFirstLeaf(t, 4, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - bm.backends[0].stop() - r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - bm.backends[2].stop() - r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[2], addrs[3]}}) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[3]); err != nil { - t.Fatal(err) - } - wantSCStates = []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[2]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[3]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_ResolverUpdates_ActiveBackendInUpdatedList(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - cc, r, bm := setupPickFirstLeaf(t, 3, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - bm.backends[0].stop() - r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - bm.backends[2].stop() - r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[2], addrs[1]}}) - - // Verify that the ClientConn stays in READY. - sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) - defer sCancel() - testutils.AwaitNoStateChange(sCtx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - wantSCStates = []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_ResolverUpdates_InActiveBackendInUpdatedList(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - cc, r, bm := setupPickFirstLeaf(t, 3, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - bm.backends[0].stop() - r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - bm.backends[2].stop() - bm.backends[0].resume() - - r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[2]}}) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - wantSCStates = []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_ResolverUpdates_IdenticalLists(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - bm.backends[0].stop() - r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) - - // Verify that the ClientConn stays in READY. - sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) - defer sCancel() - testutils.AwaitNoStateChange(sCtx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - wantSCStates = []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -// TestPickFirstLeaf_StopConnectedServer tests the behaviour of the pick first -// policy when the connected server is shut down. It carries out the following -// steps in order: -// 1. A list of addresses are given through the resolver. Only one -// of the servers is running. -// 2. The running server is stopped, causing the ClientConn to enter IDLE. -// 3. A (possibly different) server is started. -// 4. RPCs are made to kick the ClientConn out of IDLE. The test verifies that -// the RPCs reach the running server. -// -// The test verifies the ClientConn state transitions. -func (s) TestPickFirstLeaf_StopConnectedServer_FirstServerRestart(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - // shutdown all active backends except the target. - bm.stopAllExcept(0) - - r.UpdateState(resolver.State{Addresses: addrs}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - // Shut down the connected server. - bm.backends[0].stop() - testutils.AwaitState(ctx, t, cc, connectivity.Idle) - - // Start the new target server. - bm.backends[0].resume() - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - connectivity.Idle, - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_StopConnectedServer_SecondServerRestart(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - // shutdown all active backends except the target. - bm.stopAllExcept(1) - - r.UpdateState(resolver.State{Addresses: addrs}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - // Shut down the connected server. - bm.backends[1].stop() - testutils.AwaitState(ctx, t, cc, connectivity.Idle) - - // Start the new target server. - bm.backends[1].resume() - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - - wantSCStates = []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - connectivity.Idle, - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_StopConnectedServer_SecondServerToFirst(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - // shutdown all active backends except the target. - bm.stopAllExcept(1) - - r.UpdateState(resolver.State{Addresses: addrs}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - // Shut down the connected server. - bm.backends[1].stop() - testutils.AwaitState(ctx, t, cc, connectivity.Idle) - - // Start the new target server. - bm.backends[0].resume() - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - - wantSCStates = []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - connectivity.Idle, - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_StopConnectedServer_FirstServerToSecond(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - balCh := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balCh}) - cc, r, bm := setupPickFirstLeaf(t, 2, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - // shutdown all active backends except the target. - bm.stopAllExcept(0) - - r.UpdateState(resolver.State{Addresses: addrs}) - var bal *stateStoringBalancer - select { - case bal = <-balCh: - case <-ctx.Done(): - t.Fatal("Context expired while waiting for balancer to be built") - } - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - - wantSCStates := []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - // Shut down the connected server. - bm.backends[0].stop() - testutils.AwaitState(ctx, t, cc, connectivity.Idle) - - // Start the new target server. - bm.backends[1].resume() - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } - - wantSCStates = []scState{ - {Addrs: []resolver.Address{addrs[0]}, State: connectivity.Shutdown}, - {Addrs: []resolver.Address{addrs[1]}, State: connectivity.Ready}, - } - - if diff := cmp.Diff(wantSCStates, bal.subConnStates(), ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn states mismatch (-want +got):\n%s", diff) - } - - wantConnStateTransitions := []connectivity.State{ - connectivity.Connecting, - connectivity.Ready, - connectivity.Idle, - connectivity.Connecting, - connectivity.Ready, - } - if diff := cmp.Diff(wantConnStateTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -// TestPickFirstLeaf_EmptyAddressList carries out the following steps in order: -// 1. Send a resolver update with one running backend. -// 2. Send an empty address list causing the balancer to enter TRANSIENT_FAILURE. -// 3. Send a resolver update with one running backend. -// The test verifies the ClientConn state transitions. -func (s) TestPickFirstLeaf_EmptyAddressList(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - balChan := make(chan *stateStoringBalancer, 1) - balancer.Register(&stateStoringBalancerBuilder{balancer: balChan}) - cc, r, bm := setupPickFirstLeaf(t, 1, grpc.WithDefaultServiceConfig(stateStoringServiceConfig)) - addrs := bm.resolverAddrs() - - stateSubscriber := &ccStateSubscriber{} - internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, stateSubscriber) - - r.UpdateState(resolver.State{Addresses: addrs}) - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - - r.UpdateState(resolver.State{}) - testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) - - // The balancer should have entered transient failure. - // It should transition to CONNECTING from TRANSIENT_FAILURE as sticky TF - // only applies when the initial TF is reported due to connection failures - // and not bad resolver states. - r.UpdateState(resolver.State{Addresses: addrs}) - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - - wantTransitions := []connectivity.State{ - // From first resolver update. - connectivity.Connecting, - connectivity.Ready, - // From second update. - connectivity.TransientFailure, - // From third update. - connectivity.Connecting, - connectivity.Ready, - } - - if diff := cmp.Diff(wantTransitions, stateSubscriber.transitions()); diff != "" { - t.Errorf("ClientConn states mismatch (-want +got):\n%s", diff) - } -} - -// Test verifies that pickfirst correctly detects the end of the first happy -// eyeballs pass when the timer causes pickfirst to reach the end of the address -// list and failures are reported out of order. -func (s) TestPickFirstLeaf_HappyEyeballs_TF_AfterEndOfList(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - originalTimer := pfinternal.TimeAfterFunc - defer func() { - pfinternal.TimeAfterFunc = originalTimer - }() - triggerTimer, timeAfter := mockTimer() - pfinternal.TimeAfterFunc = timeAfter - - tmr := stats.NewTestMetricsRecorder() - dialer := testutils.NewBlockingDialer() - opts := []grpc.DialOption{ - grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), - grpc.WithContextDialer(dialer.DialContext), - grpc.WithStatsHandler(tmr), - } - cc, rb, bm := setupPickFirstLeaf(t, 3, opts...) - addrs := bm.resolverAddrs() - holds := bm.holds(dialer) - rb.UpdateState(resolver.State{Addresses: addrs}) - cc.Connect() - - testutils.AwaitState(ctx, t, cc, connectivity.Connecting) - - // Verify that only the first server is contacted. - if holds[0].Wait(ctx) != true { - t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 0, addrs[0]) - } - if holds[1].IsStarted() != false { - t.Fatalf("Server %d with address %q contacted unexpectedly", 1, addrs[1]) - } - if holds[2].IsStarted() != false { - t.Fatalf("Server %d with address %q contacted unexpectedly", 2, addrs[2]) - } - - // Make the happy eyeballs timer fire once and verify that the - // second server is contacted, but the third isn't. - triggerTimer() - if holds[1].Wait(ctx) != true { - t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 1, addrs[1]) - } - if holds[2].IsStarted() != false { - t.Fatalf("Server %d with address %q contacted unexpectedly", 2, addrs[2]) - } - - // Make the happy eyeballs timer fire once more and verify that the - // third server is contacted. - triggerTimer() - if holds[2].Wait(ctx) != true { - t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 2, addrs[2]) - } - - // First SubConn Fails. - holds[0].Fail(fmt.Errorf("test error")) - tmr.WaitForInt64CountIncr(ctx, 1) - - // No TF should be reported until the first pass is complete. - shortCtx, shortCancel := context.WithTimeout(ctx, defaultTestShortTimeout) - defer shortCancel() - testutils.AwaitNotState(shortCtx, t, cc, connectivity.TransientFailure) - - // Third SubConn fails. - shortCtx, shortCancel = context.WithTimeout(ctx, defaultTestShortTimeout) - defer shortCancel() - holds[2].Fail(fmt.Errorf("test error")) - tmr.WaitForInt64CountIncr(ctx, 1) - testutils.AwaitNotState(shortCtx, t, cc, connectivity.TransientFailure) - - // Last SubConn fails, this should result in a TF update. - holds[1].Fail(fmt.Errorf("test error")) - tmr.WaitForInt64CountIncr(ctx, 1) - testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) - - // Only connection attempt fails in this test. - if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_succeeded"); got != 0 { - t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_succeeded", got, 0) - } - if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_failed"); got != 1 { - t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_failed", got, 1) - } - if got, _ := tmr.Metric("grpc.lb.pick_first.disconnections"); got != 0 { - t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.disconnections", got, 0) - } -} - -// Test verifies that pickfirst attempts to connect to the second backend once -// the happy eyeballs timer expires. -func (s) TestPickFirstLeaf_HappyEyeballs_TriggerConnectionDelay(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - originalTimer := pfinternal.TimeAfterFunc - defer func() { - pfinternal.TimeAfterFunc = originalTimer - }() - triggerTimer, timeAfter := mockTimer() - pfinternal.TimeAfterFunc = timeAfter - - tmr := stats.NewTestMetricsRecorder() - dialer := testutils.NewBlockingDialer() - opts := []grpc.DialOption{ - grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), - grpc.WithContextDialer(dialer.DialContext), - grpc.WithStatsHandler(tmr), - } - cc, rb, bm := setupPickFirstLeaf(t, 2, opts...) - addrs := bm.resolverAddrs() - holds := bm.holds(dialer) - rb.UpdateState(resolver.State{Addresses: addrs}) - cc.Connect() - - testutils.AwaitState(ctx, t, cc, connectivity.Connecting) - - // Verify that only the first server is contacted. - if holds[0].Wait(ctx) != true { - t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 0, addrs[0]) - } - if holds[1].IsStarted() != false { - t.Fatalf("Server %d with address %q contacted unexpectedly", 1, addrs[1]) - } - - // Make the happy eyeballs timer fire once and verify that the - // second server is contacted. - triggerTimer() - if holds[1].Wait(ctx) != true { - t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 1, addrs[1]) - } - - // Get the connection attempt to the second server to succeed and verify - // that the channel becomes READY. - holds[1].Resume() - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - // Only connection attempt successes in this test. - if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_succeeded"); got != 1 { - t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_succeeded", got, 1) - } - if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_failed"); got != 0 { - t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_failed", got, 0) - } - if got, _ := tmr.Metric("grpc.lb.pick_first.disconnections"); got != 0 { - t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.disconnections", got, 0) - } -} - -// Test tests the pickfirst balancer by causing a SubConn to fail and then -// jumping to the 3rd SubConn after the happy eyeballs timer expires. -func (s) TestPickFirstLeaf_HappyEyeballs_TF_ThenTimerFires(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - originalTimer := pfinternal.TimeAfterFunc - defer func() { - pfinternal.TimeAfterFunc = originalTimer - }() - triggerTimer, timeAfter := mockTimer() - pfinternal.TimeAfterFunc = timeAfter - - tmr := stats.NewTestMetricsRecorder() - dialer := testutils.NewBlockingDialer() - opts := []grpc.DialOption{ - grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), - grpc.WithContextDialer(dialer.DialContext), - grpc.WithStatsHandler(tmr), - } - cc, rb, bm := setupPickFirstLeaf(t, 3, opts...) - addrs := bm.resolverAddrs() - holds := bm.holds(dialer) - rb.UpdateState(resolver.State{Addresses: addrs}) - cc.Connect() - - testutils.AwaitState(ctx, t, cc, connectivity.Connecting) - - // Verify that only the first server is contacted. - if holds[0].Wait(ctx) != true { - t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 0, addrs[0]) - } - if holds[1].IsStarted() != false { - t.Fatalf("Server %d with address %q contacted unexpectedly", 1, addrs[1]) - } - if holds[2].IsStarted() != false { - t.Fatalf("Server %d with address %q contacted unexpectedly", 2, addrs[2]) - } - - // First SubConn Fails. - holds[0].Fail(fmt.Errorf("test error")) - - // Verify that only the second server is contacted. - if holds[1].Wait(ctx) != true { - t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 1, addrs[1]) - } - if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_failed"); got != 1 { - t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_failed", got, 1) - } - if holds[2].IsStarted() != false { - t.Fatalf("Server %d with address %q contacted unexpectedly", 2, addrs[2]) - } - - // The happy eyeballs timer expires, pickfirst should stop waiting for - // server[1] to report a failure/success and request the creation of a third - // SubConn. - triggerTimer() - if holds[2].Wait(ctx) != true { - t.Fatalf("Timeout waiting for server %d with address %q to be contacted", 2, addrs[2]) - } - - // Get the connection attempt to the second server to succeed and verify - // that the channel becomes READY. - holds[1].Resume() - testutils.AwaitState(ctx, t, cc, connectivity.Ready) - - if got, _ := tmr.Metric("grpc.lb.pick_first.connection_attempts_succeeded"); got != 1 { - t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.connection_attempts_succeeded", got, 1) - } - if got, _ := tmr.Metric("grpc.lb.pick_first.disconnections"); got != 0 { - t.Errorf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.pick_first.disconnections", got, 0) - } -} - -func (s) TestPickFirstLeaf_InterleavingIPV4Preffered(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - cc := testutils.NewBalancerClientConn(t) - bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) - defer bal.Close() - ccState := balancer.ClientConnState{ - ResolverState: resolver.State{ - Endpoints: []resolver.Endpoint{ - {Addresses: []resolver.Address{{Addr: "1.1.1.1:1111"}}}, - {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, - {Addresses: []resolver.Address{{Addr: "3.3.3.3:3"}}}, - // IPv4-mapped IPv6 address, considered as an IPv4 for - // interleaving. - {Addresses: []resolver.Address{{Addr: "[::FFFF:192.168.0.1]:2222"}}}, - {Addresses: []resolver.Address{{Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}}}, - {Addresses: []resolver.Address{{Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:8080"}}}, - {Addresses: []resolver.Address{{Addr: "[fe80::1%eth0]:3333"}}}, - {Addresses: []resolver.Address{{Addr: "grpc.io:80"}}}, // not an IP. - }, - }, - } - if err := bal.UpdateClientConnState(ccState); err != nil { - t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) - } - - wantAddrs := []resolver.Address{ - {Addr: "1.1.1.1:1111"}, - {Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}, - {Addr: "grpc.io:80"}, - {Addr: "2.2.2.2:2"}, - {Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:8080"}, - {Addr: "3.3.3.3:3"}, - {Addr: "[fe80::1%eth0]:3333"}, - {Addr: "[::FFFF:192.168.0.1]:2222"}, - } - - gotAddrs, err := subConnAddresses(ctx, cc, 8) - if err != nil { - t.Fatalf("%v", err) - } - if diff := cmp.Diff(wantAddrs, gotAddrs, ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn creation order mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_InterleavingIPv6Preffered(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - cc := testutils.NewBalancerClientConn(t) - bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) - defer bal.Close() - ccState := balancer.ClientConnState{ - ResolverState: resolver.State{ - Endpoints: []resolver.Endpoint{ - {Addresses: []resolver.Address{{Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}}}, - {Addresses: []resolver.Address{{Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}}}, // duplicate, should be ignored. - {Addresses: []resolver.Address{{Addr: "1.1.1.1:1111"}}}, - {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, - {Addresses: []resolver.Address{{Addr: "3.3.3.3:3"}}}, - {Addresses: []resolver.Address{{Addr: "[::FFFF:192.168.0.1]:2222"}}}, - {Addresses: []resolver.Address{{Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:2222"}}}, - {Addresses: []resolver.Address{{Addr: "[fe80::1%eth0]:3333"}}}, - {Addresses: []resolver.Address{{Addr: "grpc.io:80"}}}, // not an IP. - }, - }, - } - if err := bal.UpdateClientConnState(ccState); err != nil { - t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) - } - - wantAddrs := []resolver.Address{ - {Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}, - {Addr: "1.1.1.1:1111"}, - {Addr: "grpc.io:80"}, - {Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:2222"}, - {Addr: "2.2.2.2:2"}, - {Addr: "[fe80::1%eth0]:3333"}, - {Addr: "3.3.3.3:3"}, - {Addr: "[::FFFF:192.168.0.1]:2222"}, - } - - gotAddrs, err := subConnAddresses(ctx, cc, 8) - if err != nil { - t.Fatalf("%v", err) - } - if diff := cmp.Diff(wantAddrs, gotAddrs, ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn creation order mismatch (-want +got):\n%s", diff) - } -} - -func (s) TestPickFirstLeaf_InterleavingUnknownPreffered(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - cc := testutils.NewBalancerClientConn(t) - bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) - defer bal.Close() - ccState := balancer.ClientConnState{ - ResolverState: resolver.State{ - Endpoints: []resolver.Endpoint{ - {Addresses: []resolver.Address{{Addr: "grpc.io:80"}}}, // not an IP. - {Addresses: []resolver.Address{{Addr: "1.1.1.1:1111"}}}, - {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, - {Addresses: []resolver.Address{{Addr: "3.3.3.3:3"}}}, - {Addresses: []resolver.Address{{Addr: "[::FFFF:192.168.0.1]:2222"}}}, - {Addresses: []resolver.Address{{Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}}}, - {Addresses: []resolver.Address{{Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:8080"}}}, - {Addresses: []resolver.Address{{Addr: "[fe80::1%eth0]:3333"}}}, - {Addresses: []resolver.Address{{Addr: "example.com:80"}}}, // not an IP. - }, - }, - } - if err := bal.UpdateClientConnState(ccState); err != nil { - t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) - } - - wantAddrs := []resolver.Address{ - {Addr: "grpc.io:80"}, - {Addr: "1.1.1.1:1111"}, - {Addr: "[0001:0001:0001:0001:0001:0001:0001:0001]:8080"}, - {Addr: "example.com:80"}, - {Addr: "2.2.2.2:2"}, - {Addr: "[0002:0002:0002:0002:0002:0002:0002:0002]:8080"}, - {Addr: "3.3.3.3:3"}, - {Addr: "[fe80::1%eth0]:3333"}, - {Addr: "[::FFFF:192.168.0.1]:2222"}, - } - - gotAddrs, err := subConnAddresses(ctx, cc, 9) - if err != nil { - t.Fatalf("%v", err) - } - if diff := cmp.Diff(wantAddrs, gotAddrs, ignoreBalAttributesOpt); diff != "" { - t.Errorf("SubConn creation order mismatch (-want +got):\n%s", diff) - } -} - -// Test verifies that pickfirst balancer transitions to READY when the health -// listener is enabled. Since client side health checking is not enabled in -// the service config, the health listener will send a health update for READY -// after registering the listener. -func (s) TestPickFirstLeaf_HealthListenerEnabled(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - bf := stub.BalancerFuncs{ - Init: func(bd *stub.BalancerData) { - bd.ChildBalancer = balancer.Get(pfbalancer.Name).Build(bd.ClientConn, bd.BuildOptions) - }, - Close: func(bd *stub.BalancerData) { - bd.ChildBalancer.Close() - }, - UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { - ccs.ResolverState = pfbalancer.EnableHealthListener(ccs.ResolverState) - return bd.ChildBalancer.UpdateClientConnState(ccs) - }, - } - - stub.Register(t.Name(), bf) - svcCfg := fmt.Sprintf(`{ "loadBalancingConfig": [{%q: {}}] }`, t.Name()) - backend := stubserver.StartTestService(t, nil) - defer backend.Stop() - opts := []grpc.DialOption{ - grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithDefaultServiceConfig(svcCfg), - } - cc, err := grpc.NewClient(backend.Address, opts...) - if err != nil { - t.Fatalf("grpc.NewClient(%q) failed: %v", backend.Address, err) - - } - defer cc.Close() - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, resolver.Address{Addr: backend.Address}); err != nil { - t.Fatal(err) - } -} - -// Test verifies that a health listener is not registered when pickfirst is not -// under a petiole policy. -func (s) TestPickFirstLeaf_HealthListenerNotEnabled(t *testing.T) { - // Wrap the clientconn to intercept NewSubConn. - // Capture the health list by wrapping the SC. - // Wrap the picker to unwrap the SC. - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - healthListenerCh := make(chan func(balancer.SubConnState)) - - bf := stub.BalancerFuncs{ - Init: func(bd *stub.BalancerData) { - ccw := &healthListenerCapturingCCWrapper{ - ClientConn: bd.ClientConn, - healthListenerCh: healthListenerCh, - subConnStateCh: make(chan balancer.SubConnState, 5), - } - bd.ChildBalancer = balancer.Get(pfbalancer.Name).Build(ccw, bd.BuildOptions) - }, - Close: func(bd *stub.BalancerData) { - bd.ChildBalancer.Close() - }, - UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { - // Functions like a non-petiole policy by not configuring the use - // of health listeners. - return bd.ChildBalancer.UpdateClientConnState(ccs) - }, - } - - stub.Register(t.Name(), bf) - svcCfg := fmt.Sprintf(`{ "loadBalancingConfig": [{%q: {}}] }`, t.Name()) - backend := stubserver.StartTestService(t, nil) - defer backend.Stop() - opts := []grpc.DialOption{ - grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithDefaultServiceConfig(svcCfg), - } - cc, err := grpc.NewClient(backend.Address, opts...) - if err != nil { - t.Fatalf("grpc.NewClient(%q) failed: %v", backend.Address, err) - - } - defer cc.Close() - cc.Connect() - - select { - case <-healthListenerCh: - t.Fatal("Health listener registered when not enabled.") - case <-time.After(defaultTestShortTimeout): - } - - testutils.AwaitState(ctx, t, cc, connectivity.Ready) -} - -// Test mocks the updates sent to the health listener and verifies that the -// balancer correctly reports the health state once the SubConn's connectivity -// state becomes READY. -func (s) TestPickFirstLeaf_HealthUpdates(t *testing.T) { - // Wrap the clientconn to intercept NewSubConn. - // Capture the health list by wrapping the SC. - // Wrap the picker to unwrap the SC. - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - healthListenerCh := make(chan func(balancer.SubConnState)) - scConnectivityStateCh := make(chan balancer.SubConnState, 5) - - bf := stub.BalancerFuncs{ - Init: func(bd *stub.BalancerData) { - ccw := &healthListenerCapturingCCWrapper{ - ClientConn: bd.ClientConn, - healthListenerCh: healthListenerCh, - subConnStateCh: scConnectivityStateCh, - } - bd.ChildBalancer = balancer.Get(pfbalancer.Name).Build(ccw, bd.BuildOptions) - }, - Close: func(bd *stub.BalancerData) { - bd.ChildBalancer.Close() - }, - UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { - ccs.ResolverState = pfbalancer.EnableHealthListener(ccs.ResolverState) - return bd.ChildBalancer.UpdateClientConnState(ccs) - }, - } - - stub.Register(t.Name(), bf) - svcCfg := fmt.Sprintf(`{ "loadBalancingConfig": [{%q: {}}] }`, t.Name()) - backend := stubserver.StartTestService(t, nil) - defer backend.Stop() - opts := []grpc.DialOption{ - grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithDefaultServiceConfig(svcCfg), - } - cc, err := grpc.NewClient(backend.Address, opts...) - if err != nil { - t.Fatalf("grpc.NewClient(%q) failed: %v", backend.Address, err) - - } - defer cc.Close() - cc.Connect() - - var healthListener func(balancer.SubConnState) - select { - case healthListener = <-healthListenerCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for health listener to be registered.") - } - - // Wait for the raw connectivity state to become READY. The LB policy should - // wait for the health updates before transitioning the channel to READY. - for { - var scs balancer.SubConnState - select { - case scs = <-scConnectivityStateCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for the SubConn connectivity state to become READY.") - } - if scs.ConnectivityState == connectivity.Ready { - break - } - } - - shortCtx, cancel := context.WithTimeout(ctx, defaultTestShortTimeout) - defer cancel() - testutils.AwaitNoStateChange(shortCtx, t, cc, connectivity.Connecting) - - // The LB policy should update the channel state based on the health state. - healthListener(balancer.SubConnState{ - ConnectivityState: connectivity.TransientFailure, - ConnectionError: fmt.Errorf("test health check failure"), - }) - testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) - - healthListener(balancer.SubConnState{ - ConnectivityState: connectivity.Connecting, - ConnectionError: balancer.ErrNoSubConnAvailable, - }) - testutils.AwaitState(ctx, t, cc, connectivity.Connecting) - - healthListener(balancer.SubConnState{ - ConnectivityState: connectivity.Ready, - }) - if err := pickfirst.CheckRPCsToBackend(ctx, cc, resolver.Address{Addr: backend.Address}); err != nil { - t.Fatal(err) - } - - // When the health check fails, the channel should transition to TF. - healthListener(balancer.SubConnState{ - ConnectivityState: connectivity.TransientFailure, - ConnectionError: fmt.Errorf("test health check failure"), - }) - testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) -} - -// Tests the case where an address update received by the pick_first LB policy -// differs in metadata which should be ignored by the LB policy. In this case, -// the test verifies that new connections are not created when the address -// update only changes the metadata. -func (s) TestPickFirstLeaf_AddressUpdateWithMetadata(t *testing.T) { - dialer := testutils.NewBlockingDialer() - dopts := []grpc.DialOption{ - grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, pfbalancer.Name)), - grpc.WithContextDialer(dialer.DialContext), - } - cc, r, backends := setupPickFirstLeaf(t, 2, dopts...) - - // Add a metadata to the addresses before pushing them to the pick_first LB - // policy through the manual resolver. - addrs := backends.resolverAddrs() - for i := range addrs { - addrs[i].Metadata = &metadata.MD{ - "test-metadata-1": []string{fmt.Sprintf("%d", i)}, - } - } - r.UpdateState(resolver.State{Addresses: addrs}) - - // Ensure that RPCs succeed to the expected backend. - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - - // Create holds for each backend. This will be used to verify the connection - // is not re-established. - holds := backends.holds(dialer) - - // Add metadata to the addresses before pushing them to the pick_first LB - // policy through the manual resolver. Leave the order of the addresses - // unchanged. - for i := range addrs { - addrs[i].Metadata = &metadata.MD{ - "test-metadata-2": []string{fmt.Sprintf("%d", i)}, - } - } - r.UpdateState(resolver.State{Addresses: addrs}) - - // Ensure that no new connection is established. - for i := range holds { - sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) - defer sCancel() - if holds[i].Wait(sCtx) { - t.Fatalf("Unexpected connection attempt to backend: %s", addrs[i]) - } - } - - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { - t.Fatal(err) - } - - // Add metadata to the addresses before pushing them to the pick_first LB - // policy through the manual resolver. Reverse of the order of addresses. - for i := range addrs { - addrs[i].Metadata = &metadata.MD{ - "test-metadata-3": []string{fmt.Sprintf("%d", i)}, - } - } - addrs[0], addrs[1] = addrs[1], addrs[0] - r.UpdateState(resolver.State{Addresses: addrs}) - - // Ensure that no new connection is established. - for i := range holds { - sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) - defer sCancel() - if holds[i].Wait(sCtx) { - t.Fatalf("Unexpected connection attempt to backend: %s", addrs[i]) - } - } - if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { - t.Fatal(err) - } -} - -// Tests the scenario where a connection is established and then breaks, leading -// to a reconnection attempt. While the reconnection is in progress, a resolver -// update with a new address is received. The test verifies that the balancer -// creates a new SubConn for the new address and that the ClientConn eventually -// becomes READY. -func (s) TestPickFirstLeaf_Reconnection(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - cc := testutils.NewBalancerClientConn(t) - bal := balancer.Get(pfbalancer.Name).Build(cc, balancer.BuildOptions{}) - defer bal.Close() - ccState := balancer.ClientConnState{ - ResolverState: resolver.State{ - Endpoints: []resolver.Endpoint{ - {Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}, - }, - }, - } - if err := bal.UpdateClientConnState(ccState); err != nil { - t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) - } - - select { - case state := <-cc.NewStateCh: - if got, want := state, connectivity.Connecting; got != want { - t.Fatalf("Received unexpected ClientConn sate: got %v, want %v", got, want) - } - case <-ctx.Done(): - t.Fatal("Context timed out waiting for ClientConn state update.") - } - - sc1 := <-cc.NewSubConnCh - select { - case <-sc1.ConnectCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for Connect() to be called on sc1.") - } - sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) - sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready}) - - if err := cc.WaitForConnectivityState(ctx, connectivity.Ready); err != nil { - t.Fatalf("Context timed out waiting for ClientConn to become READY.") - } - - // Simulate a connection breakage, this should result the channel - // transitioning to IDLE. - sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Idle}) - if err := cc.WaitForConnectivityState(ctx, connectivity.Idle); err != nil { - t.Fatalf("Context timed out waiting for ClientConn to enter IDLE.") - } - - // Calling the idle picker should result in the SubConn being re-connected. - picker := <-cc.NewPickerCh - if _, err := picker.Pick(balancer.PickInfo{}); err != balancer.ErrNoSubConnAvailable { - t.Fatalf("picker.Pick() returned error: %v, want %v", err, balancer.ErrNoSubConnAvailable) - } - - select { - case <-sc1.ConnectCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for Connect() to be called on sc1.") - } - - // Send a resolver update, removing the existing SubConn. Since the balancer - // is connecting, it should create a new SubConn for the new backend - // address. - ccState = balancer.ClientConnState{ - ResolverState: resolver.State{ - Endpoints: []resolver.Endpoint{ - {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, - }, - }, - } - if err := bal.UpdateClientConnState(ccState); err != nil { - t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) - } - - var sc2 *testutils.TestSubConn - select { - case sc2 = <-cc.NewSubConnCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for new SubConn to be created.") - } - - select { - case <-sc2.ConnectCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for Connect() to be called on sc2.") - } - sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) - sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready}) - if err := cc.WaitForConnectivityState(ctx, connectivity.Ready); err != nil { - t.Fatalf("Context timed out waiting for ClientConn to become READY.") - } -} - -// healthListenerCapturingCCWrapper is used to capture the health listener so -// that health updates can be mocked for testing. -type healthListenerCapturingCCWrapper struct { - balancer.ClientConn - healthListenerCh chan func(balancer.SubConnState) - subConnStateCh chan balancer.SubConnState -} - -func (ccw *healthListenerCapturingCCWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { - oldListener := opts.StateListener - opts.StateListener = func(scs balancer.SubConnState) { - ccw.subConnStateCh <- scs - if oldListener != nil { - oldListener(scs) - } - } - sc, err := ccw.ClientConn.NewSubConn(addrs, opts) - if err != nil { - return nil, err - } - return &healthListenerCapturingSCWrapper{ - SubConn: sc, - listenerCh: ccw.healthListenerCh, - }, nil -} - -func (ccw *healthListenerCapturingCCWrapper) UpdateState(state balancer.State) { - state.Picker = &unwrappingPicker{state.Picker} - ccw.ClientConn.UpdateState(state) -} - -type healthListenerCapturingSCWrapper struct { - balancer.SubConn - listenerCh chan func(balancer.SubConnState) -} - -func (scw *healthListenerCapturingSCWrapper) RegisterHealthListener(listener func(balancer.SubConnState)) { - scw.listenerCh <- listener -} - -// unwrappingPicker unwraps SubConns because the channel expects SubConns to be -// addrConns. -type unwrappingPicker struct { - balancer.Picker -} - -func (pw *unwrappingPicker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { - pr, err := pw.Picker.Pick(info) - if pr.SubConn != nil { - pr.SubConn = pr.SubConn.(*healthListenerCapturingSCWrapper).SubConn - } - return pr, err -} - -// subConnAddresses makes the pickfirst balancer create the requested number of -// SubConns by triggering transient failures. The function returns the -// addresses of the created SubConns. -func subConnAddresses(ctx context.Context, cc *testutils.BalancerClientConn, subConnCount int) ([]resolver.Address, error) { - addresses := []resolver.Address{} - for i := 0; i < subConnCount; i++ { - select { - case <-ctx.Done(): - return nil, fmt.Errorf("test timed out after creating %d subchannels, want %d", i, subConnCount) - case sc := <-cc.NewSubConnCh: - if len(sc.Addresses) != 1 { - return nil, fmt.Errorf("new subchannel created with %d addresses, want 1", len(sc.Addresses)) - } - addresses = append(addresses, sc.Addresses[0]) - sc.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) - sc.UpdateState(balancer.SubConnState{ - ConnectivityState: connectivity.TransientFailure, - }) - } - } - return addresses, nil -} - -// stateStoringBalancer stores the state of the SubConns being created. -type stateStoringBalancer struct { - balancer.Balancer - mu sync.Mutex - scStates []*scState -} - -func (b *stateStoringBalancer) Close() { - b.Balancer.Close() -} - -type stateStoringBalancerBuilder struct { - balancer chan *stateStoringBalancer -} - -func (b *stateStoringBalancerBuilder) Name() string { - return stateStoringBalancerName -} - -func (b *stateStoringBalancerBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { - bal := &stateStoringBalancer{} - bal.Balancer = balancer.Get(pfbalancer.Name).Build(&stateStoringCCWrapper{cc, bal}, opts) - b.balancer <- bal - return bal -} - -func (b *stateStoringBalancer) subConnStates() []scState { - b.mu.Lock() - defer b.mu.Unlock() - ret := []scState{} - for _, s := range b.scStates { - ret = append(ret, *s) - } - return ret -} - -func (b *stateStoringBalancer) addSCState(state *scState) { - b.mu.Lock() - b.scStates = append(b.scStates, state) - b.mu.Unlock() -} - -type stateStoringCCWrapper struct { - balancer.ClientConn - b *stateStoringBalancer -} - -func (ccw *stateStoringCCWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { - oldListener := opts.StateListener - scs := &scState{ - State: connectivity.Idle, - Addrs: addrs, - } - ccw.b.addSCState(scs) - opts.StateListener = func(s balancer.SubConnState) { - ccw.b.mu.Lock() - scs.State = s.ConnectivityState - ccw.b.mu.Unlock() - oldListener(s) - } - return ccw.ClientConn.NewSubConn(addrs, opts) -} - -type scState struct { - State connectivity.State - Addrs []resolver.Address -} - -type backendManager struct { - backends []*testServer -} - -func (b *backendManager) stopAllExcept(index int) { - for idx, b := range b.backends { - if idx != index { - b.stop() - } - } -} - -// resolverAddrs returns a list of resolver addresses for the stub server -// backends. Useful when pushing addresses to the manual resolver. -func (b *backendManager) resolverAddrs() []resolver.Address { - addrs := make([]resolver.Address, len(b.backends)) - for i, backend := range b.backends { - addrs[i] = resolver.Address{Addr: backend.Address} - } - return addrs -} - -func (b *backendManager) holds(dialer *testutils.BlockingDialer) []*testutils.Hold { - holds := []*testutils.Hold{} - for _, addr := range b.resolverAddrs() { - holds = append(holds, dialer.Hold(addr.Addr)) - } - return holds -} - -type ccStateSubscriber struct { - mu sync.Mutex - states []connectivity.State -} - -// transitions returns all the states that ccStateSubscriber recorded. -// Without this a race condition occurs when the test compares the states -// and the subscriber at the same time receives a connectivity.Shutdown. -func (c *ccStateSubscriber) transitions() []connectivity.State { - c.mu.Lock() - defer c.mu.Unlock() - return c.states -} - -func (c *ccStateSubscriber) OnMessage(msg any) { - c.mu.Lock() - defer c.mu.Unlock() - c.states = append(c.states, msg.(connectivity.State)) -} - -// mockTimer returns a fake timeAfterFunc that will not trigger automatically. -// It returns a function that can be called to manually trigger the execution -// of the scheduled callback. -func mockTimer() (triggerFunc func(), timerFunc func(_ time.Duration, f func()) func()) { - timerCh := make(chan struct{}) - triggerFunc = func() { - timerCh <- struct{}{} - } - return triggerFunc, func(_ time.Duration, f func()) func() { - stopCh := make(chan struct{}) - go func() { - select { - case <-timerCh: - f() - case <-stopCh: - } - }() - return sync.OnceFunc(func() { - close(stopCh) - }) - } -} diff --git a/balancer/pickfirst/pickfirstleaf_test.go b/balancer/pickfirst/pickfirstleaf_test.go deleted file mode 100644 index 05fa18c6d064..000000000000 --- a/balancer/pickfirst/pickfirstleaf_test.go +++ /dev/null @@ -1,275 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package pickfirst - -import ( - "context" - "fmt" - "testing" - "time" - - "google.golang.org/grpc/attributes" - "google.golang.org/grpc/balancer" - "google.golang.org/grpc/connectivity" - "google.golang.org/grpc/internal/testutils" - "google.golang.org/grpc/resolver" -) - -// TestAddressList_Iteration verifies the behaviour of the addressList while -// iterating through the entries. -func (s) TestAddressList_Iteration(t *testing.T) { - addrs := []resolver.Address{ - { - Addr: "192.168.1.1", - ServerName: "test-host-1", - Attributes: attributes.New("key-1", "val-1"), - BalancerAttributes: attributes.New("bal-key-1", "bal-val-1"), - }, - { - Addr: "192.168.1.2", - ServerName: "test-host-2", - Attributes: attributes.New("key-2", "val-2"), - BalancerAttributes: attributes.New("bal-key-2", "bal-val-2"), - }, - { - Addr: "192.168.1.3", - ServerName: "test-host-3", - Attributes: attributes.New("key-3", "val-3"), - BalancerAttributes: attributes.New("bal-key-3", "bal-val-3"), - }, - } - - addressList := addressList{} - addressList.updateAddrs(addrs) - - for i := 0; i < len(addrs); i++ { - if got, want := addressList.isValid(), true; got != want { - t.Fatalf("addressList.isValid() = %t, want %t", got, want) - } - if got, want := addressList.currentAddress(), addrs[i]; !want.Equal(got) { - t.Errorf("addressList.currentAddress() = %v, want %v", got, want) - } - if got, want := addressList.increment(), i+1 < len(addrs); got != want { - t.Fatalf("addressList.increment() = %t, want %t", got, want) - } - } - - if got, want := addressList.isValid(), false; got != want { - t.Fatalf("addressList.isValid() = %t, want %t", got, want) - } - - // increment an invalid address list. - if got, want := addressList.increment(), false; got != want { - t.Errorf("addressList.increment() = %t, want %t", got, want) - } - - if got, want := addressList.isValid(), false; got != want { - t.Errorf("addressList.isValid() = %t, want %t", got, want) - } - - addressList.reset() - for i := 0; i < len(addrs); i++ { - if got, want := addressList.isValid(), true; got != want { - t.Fatalf("addressList.isValid() = %t, want %t", got, want) - } - if got, want := addressList.currentAddress(), addrs[i]; !want.Equal(got) { - t.Errorf("addressList.currentAddress() = %v, want %v", got, want) - } - if got, want := addressList.increment(), i+1 < len(addrs); got != want { - t.Fatalf("addressList.increment() = %t, want %t", got, want) - } - } -} - -// TestAddressList_SeekTo verifies the behaviour of addressList.seekTo. -func (s) TestAddressList_SeekTo(t *testing.T) { - addrs := []resolver.Address{ - { - Addr: "192.168.1.1", - ServerName: "test-host-1", - Attributes: attributes.New("key-1", "val-1"), - BalancerAttributes: attributes.New("bal-key-1", "bal-val-1"), - }, - { - Addr: "192.168.1.2", - ServerName: "test-host-2", - Attributes: attributes.New("key-2", "val-2"), - BalancerAttributes: attributes.New("bal-key-2", "bal-val-2"), - }, - { - Addr: "192.168.1.3", - ServerName: "test-host-3", - Attributes: attributes.New("key-3", "val-3"), - BalancerAttributes: attributes.New("bal-key-3", "bal-val-3"), - }, - } - - addressList := addressList{} - addressList.updateAddrs(addrs) - - // Try finding an address in the list. - key := resolver.Address{ - Addr: "192.168.1.2", - ServerName: "test-host-2", - Attributes: attributes.New("key-2", "val-2"), - BalancerAttributes: attributes.New("ignored", "bal-val-2"), - } - - if got, want := addressList.seekTo(key), true; got != want { - t.Errorf("addressList.seekTo(%v) = %t, want %t", key, got, want) - } - - // It should be possible to increment once more now that the pointer has advanced. - if got, want := addressList.increment(), true; got != want { - t.Errorf("addressList.increment() = %t, want %t", got, want) - } - - if got, want := addressList.increment(), false; got != want { - t.Errorf("addressList.increment() = %t, want %t", got, want) - } - - // Seek to the key again, it is behind the pointer now. - if got, want := addressList.seekTo(key), true; got != want { - t.Errorf("addressList.seekTo(%v) = %t, want %t", key, got, want) - } - - // Seek to a key not in the list. - key = resolver.Address{ - Addr: "192.168.1.5", - ServerName: "test-host-5", - Attributes: attributes.New("key-5", "val-5"), - BalancerAttributes: attributes.New("ignored", "bal-val-5"), - } - - if got, want := addressList.seekTo(key), false; got != want { - t.Errorf("addressList.seekTo(%v) = %t, want %t", key, got, want) - } - - // It should be possible to increment once more since the pointer has not advanced. - if got, want := addressList.increment(), true; got != want { - t.Errorf("addressList.increment() = %t, want %t", got, want) - } - - if got, want := addressList.increment(), false; got != want { - t.Errorf("addressList.increment() = %t, want %t", got, want) - } -} - -// TestPickFirstLeaf_TFPickerUpdate sends TRANSIENT_FAILURE SubConn state updates -// for each SubConn managed by a pickfirst balancer. It verifies that the picker -// is updated with the expected frequency. -func (s) TestPickFirstLeaf_TFPickerUpdate(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - cc := testutils.NewBalancerClientConn(t) - bal := pickfirstBuilder{}.Build(cc, balancer.BuildOptions{}) - defer bal.Close() - ccState := balancer.ClientConnState{ - ResolverState: resolver.State{ - Endpoints: []resolver.Endpoint{ - {Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}, - {Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}, // duplicate, should be ignored. - {Addresses: []resolver.Address{{Addr: "2.2.2.2:2"}}}, - {Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}, // duplicate, should be ignored. - }, - }, - } - if err := bal.UpdateClientConnState(ccState); err != nil { - t.Fatalf("UpdateClientConnState(%v) returned error: %v", ccState, err) - } - - // PF should report TRANSIENT_FAILURE only once all the sunbconns have failed - // once. - tfErr := fmt.Errorf("test err: connection refused") - sc1 := <-cc.NewSubConnCh - select { - case <-sc1.ConnectCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for Connect() to be called on sc1.") - } - sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) - sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.TransientFailure, ConnectionError: tfErr}) - - // Move the subconn back to IDLE, it should not be re-connected until the - // first pass is complete. - shortCtx, shortCancel := context.WithTimeout(ctx, defaultTestShortTimeout) - defer shortCancel() - sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Idle}) - select { - case <-sc1.ConnectCh: - t.Fatal("Connect() unexpectedly called on sc1.") - case <-shortCtx.Done(): - } - - if err := cc.WaitForPickerWithErr(ctx, balancer.ErrNoSubConnAvailable); err != nil { - t.Fatalf("cc.WaitForPickerWithErr(%v) returned error: %v", balancer.ErrNoSubConnAvailable, err) - } - - sc2 := <-cc.NewSubConnCh - select { - case <-sc2.ConnectCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for Connect() to be called on sc2.") - } - sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Connecting}) - sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.TransientFailure, ConnectionError: tfErr}) - - if err := cc.WaitForPickerWithErr(ctx, tfErr); err != nil { - t.Fatalf("cc.WaitForPickerWithErr(%v) returned error: %v", tfErr, err) - } - - // Subsequent TRANSIENT_FAILUREs should be reported only after seeing "# of SubConns" - // TRANSIENT_FAILUREs. - - // Both the subconns should be connected in parallel. - select { - case <-sc1.ConnectCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for Connect() to be called on sc1.") - } - - shortCtx, shortCancel = context.WithTimeout(ctx, defaultTestShortTimeout) - defer shortCancel() - select { - case <-sc2.ConnectCh: - t.Fatal("Connect() called on sc2 before it completed backing-off.") - case <-shortCtx.Done(): - } - - sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Idle}) - select { - case <-sc2.ConnectCh: - case <-ctx.Done(): - t.Fatal("Context timed out waiting for Connect() to be called on sc2.") - } - - newTfErr := fmt.Errorf("test err: unreachable") - sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.TransientFailure, ConnectionError: newTfErr}) - select { - case <-time.After(defaultTestShortTimeout): - case p := <-cc.NewPickerCh: - sc, err := p.Pick(balancer.PickInfo{}) - t.Fatalf("Unexpected picker update: %v, %v", sc, err) - } - - sc2.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.TransientFailure, ConnectionError: newTfErr}) - if err := cc.WaitForPickerWithErr(ctx, newTfErr); err != nil { - t.Fatalf("cc.WaitForPickerWithErr(%v) returned error: %v", newTfErr, err) - } -} From 10c3169965c430723466d309becbb92753e50bcb Mon Sep 17 00:00:00 2001 From: Arjan Bal Date: Mon, 27 Oct 2025 12:16:54 +0530 Subject: [PATCH 3/8] Update github workflows --- .github/workflows/coverage.yml | 3 --- .github/workflows/testing.yml | 5 ----- 2 files changed, 8 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 08a206929b44..970a48ff2cc4 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -19,9 +19,6 @@ jobs: - name: Run coverage run: go test -coverprofile=coverage.out -coverpkg=./... ./... - - name: Run coverage with old pickfirst - run: GRPC_EXPERIMENTAL_ENABLE_NEW_PICK_FIRST=false go test -coverprofile=coverage_old_pickfirst.out -coverpkg=./... ./... - - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 7440e36c35b7..9c4dcae0348d 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -69,11 +69,6 @@ jobs: - type: tests goversion: '1.24' - - type: tests - goversion: '1.25' - testflags: -race - grpcenv: 'GRPC_EXPERIMENTAL_ENABLE_NEW_PICK_FIRST=false' - steps: # Setup the environment. - name: Setup GOARCH From e66f8171618ecaaff84022ead941bb3327c17d17 Mon Sep 17 00:00:00 2001 From: Arjan Bal Date: Mon, 27 Oct 2025 12:20:34 +0530 Subject: [PATCH 4/8] keep exported function with deprecated notice --- balancer/pickfirst/pickfirstleaf/pickfirstleaf.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go b/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go index 824b854c66ba..b3a48837daab 100644 --- a/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go +++ b/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go @@ -25,6 +25,19 @@ // later release. package pickfirstleaf +import ( + "google.golang.org/grpc/balancer/pickfirst" + "google.golang.org/grpc/resolver" +) + // Name is the name of the pick_first_leaf balancer. // Deprecated: Use the balancer/pickfirst package's Name instead. const Name = "pick_first" + +// EnableHealthListener updates the state to configure pickfirst for using a +// generic health listener. +// Deprecated: Use the balancer/pickfirst package's EnableHealthListener +// instead. +func EnableHealthListener(state resolver.State) resolver.State { + return pickfirst.EnableHealthListener(state) +} From 15b6c1c5538a446c7b6113dea9830c6ed6408fe8 Mon Sep 17 00:00:00 2001 From: Arjan Bal Date: Mon, 27 Oct 2025 12:26:55 +0530 Subject: [PATCH 5/8] Add experimental notic on newly exported function --- balancer/pickfirst/pickfirst.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/balancer/pickfirst/pickfirst.go b/balancer/pickfirst/pickfirst.go index b9edb68fa568..480c187bc835 100644 --- a/balancer/pickfirst/pickfirst.go +++ b/balancer/pickfirst/pickfirst.go @@ -124,6 +124,11 @@ func (pickfirstBuilder) ParseConfig(js json.RawMessage) (serviceconfig.LoadBalan // EnableHealthListener updates the state to configure pickfirst for using a // generic health listener. +// +// # Experimental +// +// Notice: This API is EXPERIMENTAL and may be changed or removed in a later +// release. func EnableHealthListener(state resolver.State) resolver.State { state.Attributes = state.Attributes.WithValue(enableHealthListenerKeyType{}, true) return state From b93b3fba52739fe21a033f5f735ff7194c98dd9d Mon Sep 17 00:00:00 2001 From: Arjan Bal Date: Mon, 27 Oct 2025 12:29:34 +0530 Subject: [PATCH 6/8] revert copyright date --- balancer/pickfirst/pickfirst.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/balancer/pickfirst/pickfirst.go b/balancer/pickfirst/pickfirst.go index 480c187bc835..b4bc3a2bf368 100644 --- a/balancer/pickfirst/pickfirst.go +++ b/balancer/pickfirst/pickfirst.go @@ -1,6 +1,6 @@ /* * - * Copyright 2024 gRPC authors. + * Copyright 2017 gRPC authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From d8099910d4c785f5e73172fdf8a01f7ea40569db Mon Sep 17 00:00:00 2001 From: Arjan Bal Date: Mon, 27 Oct 2025 14:01:33 +0530 Subject: [PATCH 7/8] Address #8561 --- xds/googledirectpath/googlec2p_test.go | 64 +++++++++++++++----------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/xds/googledirectpath/googlec2p_test.go b/xds/googledirectpath/googlec2p_test.go index 04ff61f0e29f..a692291cf714 100644 --- a/xds/googledirectpath/googlec2p_test.go +++ b/xds/googledirectpath/googlec2p_test.go @@ -103,29 +103,6 @@ func useCleanUniverseDomain(t *testing.T) { }) } -// TODO(https://github.com/grpc/grpc-go/issues/8561): this content can be hardcoded directly -// in wanted bootstraps again after old pick first is removed. -func expectedNodeJSON(ipv6Capable bool) []byte { - if !ipv6Capable { - return []byte(`{ - "id": "C2P-666", - "locality": { - "zone": "test-zone" - } - }`) - } - // Otherwise, return the node metadata including the IPv6 capability flag. - return []byte(`{ - "id": "C2P-666", - "locality": { - "zone": "test-zone" - }, - "metadata": { - "TRAFFICDIRECTOR_DIRECTPATH_C2P_IPV6_CAPABLE": true - } - }`) -} - // Tests the scenario where the bootstrap env vars are set and we're running on // GCE. The test builds a google-c2p resolver and verifies that an xDS resolver // is built and that we don't fallback to DNS (because federation is enabled by @@ -241,7 +218,12 @@ func (s) TestBuildXDS(t *testing.T) { ] }`), }, - Node: expectedNodeJSON(false), + Node: []byte(`{ + "id": "C2P-666", + "locality": { + "zone": "test-zone" + } + }`), }), }, { @@ -264,7 +246,15 @@ func (s) TestBuildXDS(t *testing.T) { ] }`), }, - Node: expectedNodeJSON(true), + Node: []byte(`{ + "id": "C2P-666", + "locality": { + "zone": "test-zone" + }, + "metadata": { + "TRAFFICDIRECTOR_DIRECTPATH_C2P_IPV6_CAPABLE": true + } + }`), }), }, { @@ -288,7 +278,15 @@ func (s) TestBuildXDS(t *testing.T) { ] }`), }, - Node: expectedNodeJSON(true), + Node: []byte(`{ + "id": "C2P-666", + "locality": { + "zone": "test-zone" + }, + "metadata": { + "TRAFFICDIRECTOR_DIRECTPATH_C2P_IPV6_CAPABLE": true + } + }`), }), }, } { @@ -450,7 +448,12 @@ func (s) TestSetUniverseDomainNonDefault(t *testing.T) { ] }`), }, - Node: expectedNodeJSON(false), + Node: []byte(`{ + "id": "C2P-666", + "locality": { + "zone": "test-zone" + } + }`), }) if diff := cmp.Diff(wantBootstrapConfig, gotConfig); diff != "" { t.Fatalf("Unexpected diff in bootstrap config (-want +got):\n%s", diff) @@ -518,7 +521,12 @@ func (s) TestDefaultUniverseDomain(t *testing.T) { ] }`), }, - Node: expectedNodeJSON(false), + Node: []byte(`{ + "id": "C2P-666", + "locality": { + "zone": "test-zone" + } + }`), }) if diff := cmp.Diff(wantBootstrapConfig, gotConfig); diff != "" { t.Fatalf("Unexpected diff in bootstrap config (-want +got):\n%s", diff) From 0b30392994211f46e6d1df6e666c2d735ae0a782 Mon Sep 17 00:00:00 2001 From: Arjan Bal Date: Tue, 28 Oct 2025 10:51:55 +0530 Subject: [PATCH 8/8] Add deprecation note on package --- balancer/pickfirst/pickfirstleaf/pickfirstleaf.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go b/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go index b3a48837daab..63b94afb2895 100644 --- a/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go +++ b/balancer/pickfirst/pickfirstleaf/pickfirstleaf.go @@ -19,10 +19,8 @@ // Package pickfirstleaf contains the pick_first load balancing policy which // will be the universal leaf policy after dualstack changes are implemented. // -// # Experimental -// -// Notice: This package is EXPERIMENTAL and may be changed or removed in a -// later release. +// Deprecated: This package is deprecated. Please use the balancer/pickfirst +// package instead. package pickfirstleaf import (