Skip to content

Commit 99aca4f

Browse files
authored
feat: add keepalive feature to tear down streams in their absence (#2605)
* feat: add keepalive feature to tear down streams in their absence * fix: reset timer each time stream is opened * fix: update timings for server monitor to meet stream close time policy * fix: update initial delay vs period in fake scheduled executor and fix test cases
1 parent e92debc commit 99aca4f

File tree

4 files changed

+342
-4
lines changed

4 files changed

+342
-4
lines changed

google-cloud-pubsub/src/main/java/com/google/cloud/pubsub/v1/StreamingSubscriberConnection.java

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
import java.util.concurrent.ConcurrentHashMap;
6464
import java.util.concurrent.Executor;
6565
import java.util.concurrent.ScheduledExecutorService;
66+
import java.util.concurrent.ScheduledFuture;
6667
import java.util.concurrent.TimeUnit;
6768
import java.util.concurrent.atomic.AtomicBoolean;
6869
import java.util.concurrent.atomic.AtomicLong;
@@ -95,6 +96,7 @@ final class StreamingSubscriberConnection extends AbstractApiService implements
9596

9697
private final SubscriberStub subscriberStub;
9798
private final int channelAffinity;
99+
private final long protocolVersion;
98100
private final String subscription;
99101
private final SubscriptionName subscriptionNameObject;
100102
private final ScheduledExecutorService systemExecutor;
@@ -127,6 +129,17 @@ final class StreamingSubscriberConnection extends AbstractApiService implements
127129
private OpenTelemetryPubsubTracer tracer = new OpenTelemetryPubsubTracer(null, false);
128130
private final SubscriberShutdownSettings subscriberShutdownSettings;
129131

132+
private final boolean enableKeepalive;
133+
private static final long KEEP_ALIVE_SUPPORT_VERSION = 1;
134+
private static final Duration CLIENT_PING_INTERVAL = Duration.ofSeconds(30);
135+
private ScheduledFuture<?> pingSchedulerHandle;
136+
137+
private static final Duration SERVER_MONITOR_INTERVAL = Duration.ofSeconds(10);
138+
private static final Duration SERVER_PING_TIMEOUT_DURATION = Duration.ofSeconds(15);
139+
private final AtomicLong lastServerResponseTime;
140+
private final AtomicLong lastClientPingTime;
141+
private ScheduledFuture<?> serverMonitorHandle;
142+
130143
private StreamingSubscriberConnection(Builder builder) {
131144
subscription = builder.subscription;
132145
subscriptionNameObject = SubscriptionName.parse(builder.subscription);
@@ -154,6 +167,7 @@ private StreamingSubscriberConnection(Builder builder) {
154167

155168
subscriberStub = builder.subscriberStub;
156169
channelAffinity = builder.channelAffinity;
170+
protocolVersion = builder.protocolVersion;
157171

158172
MessageDispatcher.Builder messageDispatcherBuilder;
159173
if (builder.receiver != null) {
@@ -190,6 +204,9 @@ private StreamingSubscriberConnection(Builder builder) {
190204

191205
flowControlSettings = builder.flowControlSettings;
192206
useLegacyFlowControl = builder.useLegacyFlowControl;
207+
enableKeepalive = protocolVersion >= KEEP_ALIVE_SUPPORT_VERSION;
208+
lastServerResponseTime = new AtomicLong(clock.nanoTime());
209+
lastClientPingTime = new AtomicLong(-1L);
193210
}
194211

195212
public StreamingSubscriberConnection setExactlyOnceDeliveryEnabled(
@@ -218,6 +235,12 @@ protected void doStop() {
218235
} finally {
219236
lock.unlock();
220237
}
238+
239+
if (enableKeepalive) {
240+
stopClientPinger();
241+
stopServerMonitor();
242+
}
243+
221244
runShutdown();
222245
notifyStopped();
223246
}
@@ -266,6 +289,10 @@ public void onStart(StreamController controller) {
266289

267290
@Override
268291
public void onResponse(StreamingPullResponse response) {
292+
if (enableKeepalive) {
293+
lastServerResponseTime.set(clock.nanoTime());
294+
}
295+
269296
channelReconnectBackoffMillis.set(INITIAL_CHANNEL_RECONNECT_BACKOFF.toMillis());
270297

271298
boolean exactlyOnceDeliveryEnabledResponse =
@@ -295,11 +322,19 @@ public void onResponse(StreamingPullResponse response) {
295322

296323
@Override
297324
public void onError(Throwable t) {
325+
if (enableKeepalive) {
326+
stopClientPinger();
327+
stopServerMonitor();
328+
}
298329
errorFuture.setException(t);
299330
}
300331

301332
@Override
302333
public void onComplete() {
334+
if (enableKeepalive) {
335+
stopClientPinger();
336+
stopServerMonitor();
337+
}
303338
logger.fine("Streaming pull terminated successfully!");
304339
errorFuture.set(null);
305340
}
@@ -336,6 +371,7 @@ private void initialize() {
336371
this.useLegacyFlowControl
337372
? 0
338373
: valueOrZero(flowControlSettings.getMaxOutstandingRequestBytes()))
374+
.setProtocolVersion(protocolVersion)
339375
.build());
340376

341377
/**
@@ -350,6 +386,13 @@ private void initialize() {
350386
lock.unlock();
351387
}
352388

389+
if (enableKeepalive) {
390+
lastServerResponseTime.set(clock.nanoTime());
391+
lastClientPingTime.set(-1L);
392+
startClientPinger();
393+
startServerMonitor();
394+
}
395+
353396
ApiFutures.addCallback(
354397
errorFuture,
355398
new ApiFutureCallback<Void>() {
@@ -366,6 +409,10 @@ public void onSuccess(@Nullable Void result) {
366409

367410
@Override
368411
public void onFailure(Throwable cause) {
412+
if (enableKeepalive) {
413+
stopClientPinger();
414+
stopServerMonitor();
415+
}
369416
if (!isAlive()) {
370417
// we don't care about subscription failures when we're no longer running.
371418
logger.log(Level.FINE, "pull failure after service no longer running", cause);
@@ -410,6 +457,100 @@ private boolean isAlive() {
410457
return state == State.RUNNING || state == State.STARTING;
411458
}
412459

460+
private void startClientPinger() {
461+
if (pingSchedulerHandle != null) {
462+
pingSchedulerHandle.cancel(false);
463+
}
464+
465+
pingSchedulerHandle =
466+
systemExecutor.scheduleAtFixedRate(
467+
() -> {
468+
try {
469+
lock.lock();
470+
try {
471+
if (clientStream != null && isAlive()) {
472+
clientStream.send(StreamingPullRequest.newBuilder().build());
473+
lastClientPingTime.set(clock.nanoTime());
474+
logger.log(Level.FINEST, "Sent client keepalive ping");
475+
}
476+
} finally {
477+
lock.unlock();
478+
}
479+
} catch (Exception e) {
480+
logger.log(Level.FINE, "Error sending client keepalive ping", e);
481+
}
482+
},
483+
0,
484+
CLIENT_PING_INTERVAL.getSeconds(),
485+
TimeUnit.SECONDS);
486+
}
487+
488+
private void stopClientPinger() {
489+
if (pingSchedulerHandle != null) {
490+
pingSchedulerHandle.cancel(false);
491+
pingSchedulerHandle = null;
492+
}
493+
}
494+
495+
private void startServerMonitor() {
496+
if (serverMonitorHandle != null) {
497+
serverMonitorHandle.cancel(false);
498+
}
499+
500+
serverMonitorHandle =
501+
systemExecutor.scheduleAtFixedRate(
502+
() -> {
503+
try {
504+
if (!isAlive()) {
505+
return;
506+
}
507+
508+
long now = clock.nanoTime();
509+
long lastResponse = lastServerResponseTime.get();
510+
long lastPing = lastClientPingTime.get();
511+
512+
if (lastPing <= lastResponse) {
513+
return;
514+
}
515+
516+
Duration elapsedSincePing = Duration.ofNanos(now - lastPing);
517+
if (elapsedSincePing.compareTo(SERVER_PING_TIMEOUT_DURATION) < 0) {
518+
return;
519+
}
520+
521+
logger.log(
522+
Level.WARNING,
523+
"No response from server for {0} seconds since last ping. Closing stream.",
524+
elapsedSincePing.getSeconds());
525+
526+
lock.lock();
527+
try {
528+
if (clientStream != null) {
529+
clientStream.closeSendWithError(
530+
Status.UNAVAILABLE
531+
.withDescription("Keepalive timeout with server")
532+
.asException());
533+
}
534+
} finally {
535+
lock.unlock();
536+
}
537+
stopServerMonitor();
538+
} catch (Exception e) {
539+
logger.log(Level.FINE, "Error in server keepalive monitor", e);
540+
}
541+
},
542+
SERVER_MONITOR_INTERVAL.getSeconds(),
543+
SERVER_MONITOR_INTERVAL.getSeconds(),
544+
TimeUnit.SECONDS);
545+
}
546+
547+
private void stopServerMonitor() {
548+
if (serverMonitorHandle != null) {
549+
serverMonitorHandle.cancel(false);
550+
serverMonitorHandle = null;
551+
}
552+
}
553+
413554
public void setResponseOutstandingMessages(AckResponse ackResponse) {
414555
// We will close the futures with ackResponse - if there are multiple references to the same
415556
// future they will be handled appropriately
@@ -769,6 +910,7 @@ public static final class Builder {
769910
private Distribution ackLatencyDistribution;
770911
private SubscriberStub subscriberStub;
771912
private int channelAffinity;
913+
private long protocolVersion;
772914
private FlowController flowController;
773915
private FlowControlSettings flowControlSettings;
774916
private boolean useLegacyFlowControl;
@@ -840,6 +982,11 @@ public Builder setChannelAffinity(int channelAffinity) {
840982
return this;
841983
}
842984

985+
public Builder setProtocolVersion(long protocolVersion) {
986+
this.protocolVersion = protocolVersion;
987+
return this;
988+
}
989+
843990
public Builder setFlowController(FlowController flowController) {
844991
this.flowController = flowController;
845992
return this;

google-cloud-pubsub/src/main/java/com/google/cloud/pubsub/v1/Subscriber.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ public class Subscriber extends AbstractApiService implements SubscriberInterfac
144144
private final boolean maxDurationPerAckExtensionDefaultUsed;
145145
private final java.time.Duration minDurationPerAckExtension;
146146
private final boolean minDurationPerAckExtensionDefaultUsed;
147+
private final long protocolVersion;
147148

148149
// The ExecutorProvider used to generate executors for processing messages.
149150
private final ExecutorProvider executorProvider;
@@ -182,6 +183,7 @@ private Subscriber(Builder builder) {
182183
maxDurationPerAckExtensionDefaultUsed = builder.maxDurationPerAckExtensionDefaultUsed;
183184
minDurationPerAckExtension = builder.minDurationPerAckExtension;
184185
minDurationPerAckExtensionDefaultUsed = builder.minDurationPerAckExtensionDefaultUsed;
186+
protocolVersion = builder.protocolVersion;
185187

186188
clock = builder.clock.isPresent() ? builder.clock.get() : CurrentMillisClock.getDefaultClock();
187189

@@ -428,6 +430,7 @@ private void startStreamingConnections() {
428430
.setEnableOpenTelemetryTracing(enableOpenTelemetryTracing)
429431
.setTracer(tracer)
430432
.setSubscriberShutdownSettings(subscriberShutdownSettings)
433+
.setProtocolVersion(protocolVersion)
431434
.build();
432435

433436
streamingSubscriberConnections.add(streamingSubscriberConnection);
@@ -548,6 +551,8 @@ public static final class Builder {
548551
private boolean enableOpenTelemetryTracing = false;
549552
private OpenTelemetry openTelemetry = null;
550553

554+
private long protocolVersion = 0L;
555+
551556
private SubscriberShutdownSettings subscriberShutdownSettings =
552557
SubscriberShutdownSettings.newBuilder().build();
553558

@@ -771,6 +776,12 @@ Builder setClock(ApiClock clock) {
771776
return this;
772777
}
773778

779+
/** Gives the ability to override the protocol version */
780+
public Builder setProtocolVersion(long protocolVersion) {
781+
this.protocolVersion = protocolVersion;
782+
return this;
783+
}
784+
774785
/**
775786
* OpenTelemetry will be enabled if setEnableOpenTelemetry is true and and instance of
776787
* OpenTelemetry has been provied. Warning: traces are subject to change. The name and

google-cloud-pubsub/src/test/java/com/google/cloud/pubsub/v1/FakeScheduledExecutorService.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,14 @@ public FakeClock getClock() {
5555
public ScheduledFuture<?> schedule(Runnable command, long delay, TimeUnit unit) {
5656
return schedulePendingCallable(
5757
new PendingCallable<>(
58-
Duration.ofMillis(unit.toMillis(delay)), command, PendingCallableType.NORMAL));
58+
Duration.ofMillis(unit.toMillis(delay)), command, null, PendingCallableType.NORMAL));
5959
}
6060

6161
@Override
6262
public <V> ScheduledFuture<V> schedule(Callable<V> callable, long delay, TimeUnit unit) {
6363
return schedulePendingCallable(
6464
new PendingCallable<>(
65-
Duration.ofMillis(unit.toMillis(delay)), callable, PendingCallableType.NORMAL));
65+
Duration.ofMillis(unit.toMillis(delay)), callable, null, PendingCallableType.NORMAL));
6666
}
6767

6868
@Override
@@ -72,6 +72,7 @@ public ScheduledFuture<?> scheduleAtFixedRate(
7272
new PendingCallable<>(
7373
Duration.ofMillis(unit.toMillis(initialDelay)),
7474
command,
75+
Duration.ofMillis(unit.toMillis(period)),
7576
PendingCallableType.FIXED_RATE));
7677
}
7778

@@ -82,6 +83,7 @@ public ScheduledFuture<?> scheduleWithFixedDelay(
8283
new PendingCallable<>(
8384
Duration.ofMillis(unit.toMillis(initialDelay)),
8485
command,
86+
Duration.ofMillis(unit.toMillis(delay)),
8587
PendingCallableType.FIXED_DELAY));
8688
}
8789

@@ -212,13 +214,15 @@ enum PendingCallableType {
212214
class PendingCallable<T> implements Comparable<PendingCallable<T>> {
213215
Instant creationTime = Instant.ofEpochMilli(clock.millisTime());
214216
Duration delay;
217+
Duration period;
215218
Callable<T> pendingCallable;
216219
SettableFuture<T> future = SettableFuture.create();
217220
AtomicBoolean cancelled = new AtomicBoolean(false);
218221
AtomicBoolean done = new AtomicBoolean(false);
219222
PendingCallableType type;
220223

221-
PendingCallable(Duration delay, final Runnable runnable, PendingCallableType type) {
224+
PendingCallable(
225+
Duration delay, final Runnable runnable, Duration period, PendingCallableType type) {
222226
pendingCallable =
223227
new Callable<T>() {
224228
@Override
@@ -229,12 +233,15 @@ public T call() {
229233
};
230234
this.type = type;
231235
this.delay = delay;
236+
this.period = period;
232237
}
233238

234-
PendingCallable(Duration delay, Callable<T> callable, PendingCallableType type) {
239+
PendingCallable(
240+
Duration delay, Callable<T> callable, Duration period, PendingCallableType type) {
235241
pendingCallable = callable;
236242
this.type = type;
237243
this.delay = delay;
244+
this.period = period;
238245
}
239246

240247
private Instant getScheduledTime() {
@@ -305,10 +312,12 @@ T call() {
305312
break;
306313
case FIXED_DELAY:
307314
this.creationTime = Instant.ofEpochMilli(clock.millisTime());
315+
this.delay = period;
308316
schedulePendingCallable(this);
309317
break;
310318
case FIXED_RATE:
311319
this.creationTime = this.creationTime.plus(delay);
320+
this.delay = period;
312321
schedulePendingCallable(this);
313322
break;
314323
default:

0 commit comments

Comments
 (0)