From b5fb02574e912bb7e3a1cc9e0ecd5b86fa047de6 Mon Sep 17 00:00:00 2001 From: nishkarsh-db Date: Thu, 19 Feb 2026 14:07:02 +0530 Subject: [PATCH 1/5] [1/4] Add retry foundation: RequestType, RetryTimeoutManager, and RetryUtils - Add RequestType enum with retry policy classification (IDEMPOTENT/NON_IDEMPOTENT) - Add RetryPolicy enum for categorizing request types - Add RetryTimeoutManager for managing timeout budgets per status code - Add RetryUtils with extractRetryException() method - Add RetryTimeoutManagerTest with 6 comprehensive tests This PR establishes the foundation for unified retry handling across HTTP and Thrift operations. Components work together to make intelligent retry decisions based on request type, status codes, and timeout budgets. Co-authored-by: Cursor --- .../databricks/jdbc/common/RequestType.java | 31 ++++++ .../databricks/jdbc/common/RetryPolicy.java | 15 +++ .../impl/http/RetryTimeoutManager.java | 73 +++++++++++++ .../jdbc/dbclient/impl/http/RetryUtils.java | 29 +++++ .../impl/http/RetryTimeoutManagerTest.java | 101 ++++++++++++++++++ 5 files changed, 249 insertions(+) create mode 100644 src/main/java/com/databricks/jdbc/common/RequestType.java create mode 100644 src/main/java/com/databricks/jdbc/common/RetryPolicy.java create mode 100644 src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java create mode 100644 src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java create mode 100644 src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java diff --git a/src/main/java/com/databricks/jdbc/common/RequestType.java b/src/main/java/com/databricks/jdbc/common/RequestType.java new file mode 100644 index 0000000000..fa04d5fbf0 --- /dev/null +++ b/src/main/java/com/databricks/jdbc/common/RequestType.java @@ -0,0 +1,31 @@ +package com.databricks.jdbc.common; + +public enum RequestType { + UNKNOWN(RetryPolicy.NON_IDEMPOTENT), + FETCH_FEATURE_FLAGS(RetryPolicy.IDEMPOTENT), + THRIFT_OPEN_SESSION(RetryPolicy.IDEMPOTENT), + THRIFT_CLOSE_SESSION(RetryPolicy.IDEMPOTENT), + THRIFT_METADATA(RetryPolicy.IDEMPOTENT), + THRIFT_CLOSE_OPERATION(RetryPolicy.IDEMPOTENT), + THRIFT_CANCEL_OPERATION(RetryPolicy.IDEMPOTENT), + THRIFT_EXECUTE_STATEMENT(RetryPolicy.NON_IDEMPOTENT), + THRIFT_FETCH_RESULTS(RetryPolicy.NON_IDEMPOTENT), + CLOUD_FETCH(RetryPolicy.IDEMPOTENT), + VOLUME_LIST(RetryPolicy.IDEMPOTENT), + VOLUME_SHOW_VOLUMES(RetryPolicy.IDEMPOTENT), + VOLUME_GET(RetryPolicy.IDEMPOTENT), + VOLUME_PUT(RetryPolicy.NON_IDEMPOTENT), + VOLUME_DELETE(RetryPolicy.IDEMPOTENT), + AUTH(RetryPolicy.IDEMPOTENT), + TELEMETRY_PUSH(RetryPolicy.IDEMPOTENT); + + private final RetryPolicy retryPolicy; + + RequestType(RetryPolicy retryPolicy) { + this.retryPolicy = retryPolicy; + } + + public RetryPolicy getRetryPolicy() { + return retryPolicy; + } +} diff --git a/src/main/java/com/databricks/jdbc/common/RetryPolicy.java b/src/main/java/com/databricks/jdbc/common/RetryPolicy.java new file mode 100644 index 0000000000..301bba6c89 --- /dev/null +++ b/src/main/java/com/databricks/jdbc/common/RetryPolicy.java @@ -0,0 +1,15 @@ +package com.databricks.jdbc.common; + +public enum RetryPolicy { + /** + * Idempotent requests can be safely retried multiple times without side effects. Examples: + * Metadata Queries. + */ + IDEMPOTENT, + + /** + * Non-idempotent requests may have side effects and should be retried carefully. Example: Execute + * Statement + */ + NON_IDEMPOTENT +} diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java new file mode 100644 index 0000000000..2dcedb22e1 --- /dev/null +++ b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java @@ -0,0 +1,73 @@ +package com.databricks.jdbc.dbclient.impl.http; + +import com.databricks.jdbc.api.internal.IDatabricksConnectionContext; +import org.apache.http.HttpStatus; + +/** + * Manages retry decisions and timeout updates based on HTTP responses and exceptions. Coordinates + * with retry strategies to determine whether requests should be retried and updates request + * timeouts accordingly. + */ +public class RetryTimeoutManager { + private long tempUnavailableTimeoutMillis; + private long rateLimitTimeoutMillis; + private long otherErrorCodesTimeoutMillis; + private long exceptionTimeoutMillis; + + /** + * Creates a new RetryTimeoutManager with connection context. + * + * @param connectionContext the connection context for timeout configurations + */ + public RetryTimeoutManager(IDatabricksConnectionContext connectionContext) { + // Initialize timeouts + this.tempUnavailableTimeoutMillis = + connectionContext.getTemporarilyUnavailableRetryTimeout() * 1000L; + this.rateLimitTimeoutMillis = connectionContext.getRateLimitRetryTimeout() * 1000L; + this.otherErrorCodesTimeoutMillis = RetryUtils.REQUEST_TIMEOUT_SECONDS * 1000L; + this.exceptionTimeoutMillis = RetryUtils.REQUEST_EXCEPTION_TIMEOUT_SECONDS * 1000L; + } + + /** + * Evaluates retry decision based on HTTP status code and updates timeout accordingly. Uses the + * Retry-After header value when provided by the strategy. + * + * @param statusCode the HTTP status code from the response + * @param retryDelayMillis the retry delay in milliseconds to subtract from timeout + * @return true if the request should be retried, false otherwise + */ + public boolean evaluateRetryTimeoutForResponse(int statusCode, int retryDelayMillis) { + // Update the appropriate timeout based on status code + switch (statusCode) { + case HttpStatus.SC_SERVICE_UNAVAILABLE: + tempUnavailableTimeoutMillis -= retryDelayMillis; + break; + case HttpStatus.SC_TOO_MANY_REQUESTS: + rateLimitTimeoutMillis -= retryDelayMillis; + break; + default: + otherErrorCodesTimeoutMillis -= retryDelayMillis; + break; + } + + // Check if any timeout has been exceeded + return tempUnavailableTimeoutMillis > 0 + && rateLimitTimeoutMillis > 0 + && otherErrorCodesTimeoutMillis > 0; + } + + /** + * Evaluates retry decision based on an exception and updates timeout accordingly. + * + * @param exception the exception that occurred during request execution + * @param retryDelayMillis the retry delay in milliseconds to subtract from timeout + * @return true if the request should be retried, false otherwise + */ + public boolean evaluateRetryTimeoutForException(int retryDelayMillis) { + // Update exception timeout by subtracting the retry delay + exceptionTimeoutMillis -= retryDelayMillis; + + // Check if exception timeout has been exceeded + return exceptionTimeoutMillis > 0; + } +} diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java new file mode 100644 index 0000000000..e483fbd2c1 --- /dev/null +++ b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java @@ -0,0 +1,29 @@ +package com.databricks.jdbc.dbclient.impl.http; + +import com.databricks.jdbc.exception.DatabricksRetryHandlerException; + +/** + * Utility class containing common retry handling helper functions used across different retry + * strategies and handlers. + */ +public class RetryUtils { + public static final long REQUEST_TIMEOUT_SECONDS = 10; + public static final long REQUEST_EXCEPTION_TIMEOUT_SECONDS = 10; + + /** + * Extracts DatabricksRetryHandlerException from the exception cause chain. + * + * @param e the exception to search through + * @return the DatabricksRetryHandlerException if found, null otherwise + */ + public static DatabricksRetryHandlerException extractRetryException(Throwable e) { + Throwable cause = e.getCause(); + while (cause != null) { + if (cause instanceof DatabricksRetryHandlerException) { + return (DatabricksRetryHandlerException) cause; + } + cause = cause.getCause(); + } + return null; + } +} diff --git a/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java new file mode 100644 index 0000000000..e379091eac --- /dev/null +++ b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java @@ -0,0 +1,101 @@ +package com.databricks.jdbc.dbclient.impl.http; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +import com.databricks.jdbc.api.internal.IDatabricksConnectionContext; +import org.apache.http.HttpStatus; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class RetryTimeoutManagerTest { + + private IDatabricksConnectionContext mockContext; + private RetryTimeoutManager timeoutManager; + + @BeforeEach + void setUp() { + mockContext = mock(IDatabricksConnectionContext.class); + when(mockContext.getTemporarilyUnavailableRetryTimeout()).thenReturn(120); // 120 seconds + when(mockContext.getRateLimitRetryTimeout()).thenReturn(300); // 300 seconds + timeoutManager = new RetryTimeoutManager(mockContext); + } + + @Test + void testServiceUnavailableTimeoutExhausted() { + // Make multiple retries that exhaust the service unavailable timeout (120 seconds) + // Each retry delays for 30 seconds + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 30000)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 30000)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 30000)); + // This should exhaust the timeout (30+30+30+31 > 120 seconds) + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 31000)); + } + + @Test + void testRateLimitTimeoutExhausted() { + // Make multiple retries that exhaust the rate limit timeout (300 seconds) + // Each retry delays for 80 seconds + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 80000)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 80000)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 80000)); + // This should exhaust the timeout (80+80+80+61 > 300 seconds) + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 61000)); + } + + @Test + void testOtherErrorCodesTimeout() { + // Test other error codes (e.g., 500) using the default 10-second timeout + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000)); + // This should exhaust the timeout (3+3+3+2 > 10 seconds) + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 2000)); + } + + @Test + void testExceptionTimeout() { + // Test exception timeout (default 10 seconds) + assertTrue(timeoutManager.evaluateRetryTimeoutForException(3000)); + assertTrue(timeoutManager.evaluateRetryTimeoutForException(3000)); + assertTrue(timeoutManager.evaluateRetryTimeoutForException(3000)); + // This should exhaust the timeout (3+3+3+2 > 10 seconds) + assertFalse(timeoutManager.evaluateRetryTimeoutForException(2000)); + } + + @Test + void testMixedRetries() { + // Test combination of different status codes + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 20000)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 50000)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 2000)); + assertTrue(timeoutManager.evaluateRetryTimeoutForException(2000)); + + // All timeouts should still have capacity + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 10000)); + } + + @Test + void testImmediateTimeoutExhaustion() { + // A single large delay can exhaust the timeout + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 121000)); // > 120 seconds + } +} From 82a654c0555a21dfff909c1d19f26cadd6cf6be9 Mon Sep 17 00:00:00 2001 From: nishkarsh-db Date: Thu, 19 Feb 2026 15:22:33 +0530 Subject: [PATCH 2/5] Add API retriable codes support to RetryTimeoutManager - Add apiRetriableCodesTimeoutMillis field with independent timeout budget - Add boolean parameter to evaluateRetryTimeoutForResponse() to distinguish API codes - When isApiRetriableCode=true, only API codes timeout is deducted - Add 2 new tests: testApiRetriableCodesTimeout and testApiRetriableCodesIndependentTimeout - Total 8 tests pass (6 original + 2 new) This allows custom API retriable codes (e.g. 404, 400) to have their own separate timeout budget (300s default) independent from standard retry codes. Strategies will pass true when status code is in custom ApiRetriableHttpCodes list. Co-authored-by: Cursor --- .../impl/http/RetryTimeoutManager.java | 14 +++- .../impl/http/RetryTimeoutManagerTest.java | 81 +++++++++++++++---- 2 files changed, 76 insertions(+), 19 deletions(-) diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java index 2dcedb22e1..14eacd143f 100644 --- a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java +++ b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java @@ -13,6 +13,7 @@ public class RetryTimeoutManager { private long rateLimitTimeoutMillis; private long otherErrorCodesTimeoutMillis; private long exceptionTimeoutMillis; + private long apiRetriableCodesTimeoutMillis; /** * Creates a new RetryTimeoutManager with connection context. @@ -26,6 +27,7 @@ public RetryTimeoutManager(IDatabricksConnectionContext connectionContext) { this.rateLimitTimeoutMillis = connectionContext.getRateLimitRetryTimeout() * 1000L; this.otherErrorCodesTimeoutMillis = RetryUtils.REQUEST_TIMEOUT_SECONDS * 1000L; this.exceptionTimeoutMillis = RetryUtils.REQUEST_EXCEPTION_TIMEOUT_SECONDS * 1000L; + this.apiRetriableCodesTimeoutMillis = connectionContext.getApiRetryTimeout() * 1000L; } /** @@ -34,10 +36,18 @@ public RetryTimeoutManager(IDatabricksConnectionContext connectionContext) { * * @param statusCode the HTTP status code from the response * @param retryDelayMillis the retry delay in milliseconds to subtract from timeout + * @param isApiRetriableCode true if this is a custom API retriable code, false otherwise * @return true if the request should be retried, false otherwise */ - public boolean evaluateRetryTimeoutForResponse(int statusCode, int retryDelayMillis) { - // Update the appropriate timeout based on status code + public boolean evaluateRetryTimeoutForResponse( + int statusCode, int retryDelayMillis, boolean isApiRetriableCode) { + // If this is a custom API retriable code, only deduct from API codes timeout + if (isApiRetriableCode) { + apiRetriableCodesTimeoutMillis -= retryDelayMillis; + return apiRetriableCodesTimeoutMillis > 0; + } + + // Otherwise, update the appropriate timeout based on status code switch (statusCode) { case HttpStatus.SC_SERVICE_UNAVAILABLE: tempUnavailableTimeoutMillis -= retryDelayMillis; diff --git a/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java index e379091eac..20a2063a84 100644 --- a/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java +++ b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java @@ -18,6 +18,7 @@ void setUp() { mockContext = mock(IDatabricksConnectionContext.class); when(mockContext.getTemporarilyUnavailableRetryTimeout()).thenReturn(120); // 120 seconds when(mockContext.getRateLimitRetryTimeout()).thenReturn(300); // 300 seconds + when(mockContext.getApiRetryTimeout()).thenReturn(300); // 300 seconds for API retriable codes timeoutManager = new RetryTimeoutManager(mockContext); } @@ -26,14 +27,18 @@ void testServiceUnavailableTimeoutExhausted() { // Make multiple retries that exhaust the service unavailable timeout (120 seconds) // Each retry delays for 30 seconds assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 30000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 30000, false)); assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 30000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 30000, false)); assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 30000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 30000, false)); // This should exhaust the timeout (30+30+30+31 > 120 seconds) assertFalse( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 31000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 31000, false)); } @Test @@ -41,28 +46,36 @@ void testRateLimitTimeoutExhausted() { // Make multiple retries that exhaust the rate limit timeout (300 seconds) // Each retry delays for 80 seconds assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 80000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, 80000, false)); assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 80000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, 80000, false)); assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 80000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, 80000, false)); // This should exhaust the timeout (80+80+80+61 > 300 seconds) assertFalse( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 61000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, 61000, false)); } @Test void testOtherErrorCodesTimeout() { // Test other error codes (e.g., 500) using the default 10-second timeout assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000, false)); assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000, false)); assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000, false)); // This should exhaust the timeout (3+3+3+2 > 10 seconds) assertFalse( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 2000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, 2000, false)); } @Test @@ -79,16 +92,20 @@ void testExceptionTimeout() { void testMixedRetries() { // Test combination of different status codes assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 20000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 20000, false)); assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_TOO_MANY_REQUESTS, 50000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, 50000, false)); assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR, 2000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, 2000, false)); assertTrue(timeoutManager.evaluateRetryTimeoutForException(2000)); // All timeouts should still have capacity assertTrue( - timeoutManager.evaluateRetryTimeoutForResponse(HttpStatus.SC_SERVICE_UNAVAILABLE, 10000)); + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 10000, false)); } @Test @@ -96,6 +113,36 @@ void testImmediateTimeoutExhaustion() { // A single large delay can exhaust the timeout assertFalse( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_SERVICE_UNAVAILABLE, 121000)); // > 120 seconds + HttpStatus.SC_SERVICE_UNAVAILABLE, 121000, false)); // > 120 seconds + } + + @Test + void testApiRetriableCodesTimeout() { + // Test custom API retriable codes (e.g., 404) using the API retry timeout (300 seconds) + // When isApiRetriableCode=true, only API codes timeout is used + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); // 99s + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); // 198s total + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); // 297s total + // This should exhaust the timeout (99+99+99+4 > 300 seconds) + assertFalse(timeoutManager.evaluateRetryTimeoutForResponse(404, 4000, true)); + } + + @Test + void testApiRetriableCodesIndependentTimeout() { + // Test that API retriable codes have independent timeout from standard codes + // Exhaust standard 503 timeout (120 seconds) + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 60000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 59000, false)); // 119 seconds total + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 2000, false)); // Would exceed (119+2 > 120) + + // API retriable codes should still work (300 second timeout) + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); } } From 308431bc672ed4fe1f6ef346343e2a3e9b963e78 Mon Sep 17 00:00:00 2001 From: nishkarsh-db Date: Thu, 19 Feb 2026 15:26:46 +0530 Subject: [PATCH 3/5] Refactor RetryTimeoutManagerTest to use constants instead of magic numbers - Extract timeout values as class constants (TEMP_UNAVAILABLE_TIMEOUT_SECONDS, etc.) - Calculate test delays as expressions (timeout/4, timeout/3) instead of hardcoded values - Use (constant + 1) pattern for exhaustion tests instead of magic numbers - Improves maintainability and makes test intent clearer - All 8 tests pass Co-authored-by: Cursor --- .../impl/http/RetryTimeoutManagerTest.java | 97 +++++++++++-------- 1 file changed, 56 insertions(+), 41 deletions(-) diff --git a/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java index 20a2063a84..3cb172ae81 100644 --- a/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java +++ b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java @@ -10,82 +10,91 @@ class RetryTimeoutManagerTest { + private static final int TEMP_UNAVAILABLE_TIMEOUT_SECONDS = 120; + private static final int RATE_LIMIT_TIMEOUT_SECONDS = 300; + private static final int API_RETRIABLE_TIMEOUT_SECONDS = 300; + private IDatabricksConnectionContext mockContext; private RetryTimeoutManager timeoutManager; @BeforeEach void setUp() { mockContext = mock(IDatabricksConnectionContext.class); - when(mockContext.getTemporarilyUnavailableRetryTimeout()).thenReturn(120); // 120 seconds - when(mockContext.getRateLimitRetryTimeout()).thenReturn(300); // 300 seconds - when(mockContext.getApiRetryTimeout()).thenReturn(300); // 300 seconds for API retriable codes + when(mockContext.getTemporarilyUnavailableRetryTimeout()) + .thenReturn(TEMP_UNAVAILABLE_TIMEOUT_SECONDS); + when(mockContext.getRateLimitRetryTimeout()).thenReturn(RATE_LIMIT_TIMEOUT_SECONDS); + when(mockContext.getApiRetryTimeout()).thenReturn(API_RETRIABLE_TIMEOUT_SECONDS); timeoutManager = new RetryTimeoutManager(mockContext); } @Test void testServiceUnavailableTimeoutExhausted() { - // Make multiple retries that exhaust the service unavailable timeout (120 seconds) - // Each retry delays for 30 seconds + // Make multiple retries that exhaust the service unavailable timeout + int delaySeconds = TEMP_UNAVAILABLE_TIMEOUT_SECONDS / 4; assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_SERVICE_UNAVAILABLE, 30000, false)); + HttpStatus.SC_SERVICE_UNAVAILABLE, delaySeconds * 1000, false)); assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_SERVICE_UNAVAILABLE, 30000, false)); + HttpStatus.SC_SERVICE_UNAVAILABLE, delaySeconds * 1000, false)); assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_SERVICE_UNAVAILABLE, 30000, false)); - // This should exhaust the timeout (30+30+30+31 > 120 seconds) + HttpStatus.SC_SERVICE_UNAVAILABLE, delaySeconds * 1000, false)); + // This should exhaust the timeout assertFalse( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_SERVICE_UNAVAILABLE, 31000, false)); + HttpStatus.SC_SERVICE_UNAVAILABLE, (delaySeconds + 1) * 1000, false)); } @Test void testRateLimitTimeoutExhausted() { - // Make multiple retries that exhaust the rate limit timeout (300 seconds) - // Each retry delays for 80 seconds + // Make multiple retries that exhaust the rate limit timeout + int delaySeconds = RATE_LIMIT_TIMEOUT_SECONDS / 4; assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_TOO_MANY_REQUESTS, 80000, false)); + HttpStatus.SC_TOO_MANY_REQUESTS, delaySeconds * 1000, false)); assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_TOO_MANY_REQUESTS, 80000, false)); + HttpStatus.SC_TOO_MANY_REQUESTS, delaySeconds * 1000, false)); assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_TOO_MANY_REQUESTS, 80000, false)); - // This should exhaust the timeout (80+80+80+61 > 300 seconds) + HttpStatus.SC_TOO_MANY_REQUESTS, delaySeconds * 1000, false)); + // This should exhaust the timeout assertFalse( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_TOO_MANY_REQUESTS, 61000, false)); + HttpStatus.SC_TOO_MANY_REQUESTS, (delaySeconds + 1) * 1000, false)); } @Test void testOtherErrorCodesTimeout() { // Test other error codes (e.g., 500) using the default 10-second timeout + int otherErrorsTimeout = 10; // RetryUtils.REQUEST_TIMEOUT_SECONDS + int delaySeconds = otherErrorsTimeout / 3; assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000, false)); + HttpStatus.SC_INTERNAL_SERVER_ERROR, delaySeconds * 1000, false)); assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000, false)); + HttpStatus.SC_INTERNAL_SERVER_ERROR, delaySeconds * 1000, false)); assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_INTERNAL_SERVER_ERROR, 3000, false)); - // This should exhaust the timeout (3+3+3+2 > 10 seconds) + HttpStatus.SC_INTERNAL_SERVER_ERROR, delaySeconds * 1000, false)); + // This should exhaust the timeout assertFalse( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_INTERNAL_SERVER_ERROR, 2000, false)); + HttpStatus.SC_INTERNAL_SERVER_ERROR, (delaySeconds + 1) * 1000, false)); } @Test void testExceptionTimeout() { // Test exception timeout (default 10 seconds) - assertTrue(timeoutManager.evaluateRetryTimeoutForException(3000)); - assertTrue(timeoutManager.evaluateRetryTimeoutForException(3000)); - assertTrue(timeoutManager.evaluateRetryTimeoutForException(3000)); - // This should exhaust the timeout (3+3+3+2 > 10 seconds) - assertFalse(timeoutManager.evaluateRetryTimeoutForException(2000)); + int exceptionTimeout = 10; // RetryUtils.REQUEST_EXCEPTION_TIMEOUT_SECONDS + int delaySeconds = exceptionTimeout / 3; + assertTrue(timeoutManager.evaluateRetryTimeoutForException(delaySeconds * 1000)); + assertTrue(timeoutManager.evaluateRetryTimeoutForException(delaySeconds * 1000)); + assertTrue(timeoutManager.evaluateRetryTimeoutForException(delaySeconds * 1000)); + // This should exhaust the timeout + assertFalse(timeoutManager.evaluateRetryTimeoutForException((delaySeconds + 1) * 1000)); } @Test @@ -113,36 +122,42 @@ void testImmediateTimeoutExhaustion() { // A single large delay can exhaust the timeout assertFalse( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_SERVICE_UNAVAILABLE, 121000, false)); // > 120 seconds + HttpStatus.SC_SERVICE_UNAVAILABLE, + (TEMP_UNAVAILABLE_TIMEOUT_SECONDS + 1) * 1000, + false)); } @Test void testApiRetriableCodesTimeout() { - // Test custom API retriable codes (e.g., 404) using the API retry timeout (300 seconds) + // Test custom API retriable codes (e.g., 404) using the API retry timeout // When isApiRetriableCode=true, only API codes timeout is used - assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); // 99s - assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); // 198s total - assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); // 297s total - // This should exhaust the timeout (99+99+99+4 > 300 seconds) - assertFalse(timeoutManager.evaluateRetryTimeoutForResponse(404, 4000, true)); + int delaySeconds = API_RETRIABLE_TIMEOUT_SECONDS / 4; + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, delaySeconds * 1000, true)); + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, delaySeconds * 1000, true)); + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, delaySeconds * 1000, true)); + // This should exhaust the timeout + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse(404, (delaySeconds + 1) * 1000, true)); } @Test void testApiRetriableCodesIndependentTimeout() { // Test that API retriable codes have independent timeout from standard codes - // Exhaust standard 503 timeout (120 seconds) + // Exhaust standard 503 timeout + int delaySeconds = TEMP_UNAVAILABLE_TIMEOUT_SECONDS / 2; assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_SERVICE_UNAVAILABLE, 60000, false)); + HttpStatus.SC_SERVICE_UNAVAILABLE, delaySeconds * 1000, false)); assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_SERVICE_UNAVAILABLE, 59000, false)); // 119 seconds total + HttpStatus.SC_SERVICE_UNAVAILABLE, (delaySeconds - 1) * 1000, false)); assertFalse( timeoutManager.evaluateRetryTimeoutForResponse( - HttpStatus.SC_SERVICE_UNAVAILABLE, 2000, false)); // Would exceed (119+2 > 120) + HttpStatus.SC_SERVICE_UNAVAILABLE, 2000, false)); - // API retriable codes should still work (300 second timeout) - assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); - assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, 99000, true)); + // API retriable codes should still work + int apiDelaySeconds = API_RETRIABLE_TIMEOUT_SECONDS / 3; + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, apiDelaySeconds * 1000, true)); + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, apiDelaySeconds * 1000, true)); } } From 98a6f6d8d1c38e8a7b52f5573727aed99e68abd4 Mon Sep 17 00:00:00 2001 From: nishkarsh-db Date: Thu, 19 Feb 2026 15:37:09 +0530 Subject: [PATCH 4/5] Change default retry timeouts from 10s to 120s - Update REQUEST_TIMEOUT_SECONDS from 10 to 120 - Update REQUEST_EXCEPTION_TIMEOUT_SECONDS from 10 to 120 - Update tests to reflect new 120-second timeout values - All 8 tests pass Co-authored-by: Cursor --- .../jdbc/dbclient/impl/http/RetryUtils.java | 4 ++-- .../dbclient/impl/http/RetryTimeoutManagerTest.java | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java index e483fbd2c1..b3e05c5b59 100644 --- a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java +++ b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java @@ -7,8 +7,8 @@ * strategies and handlers. */ public class RetryUtils { - public static final long REQUEST_TIMEOUT_SECONDS = 10; - public static final long REQUEST_EXCEPTION_TIMEOUT_SECONDS = 10; + public static final long REQUEST_TIMEOUT_SECONDS = 120; + public static final long REQUEST_EXCEPTION_TIMEOUT_SECONDS = 120; /** * Extracts DatabricksRetryHandlerException from the exception cause chain. diff --git a/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java index 3cb172ae81..9f18d44476 100644 --- a/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java +++ b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java @@ -67,9 +67,9 @@ void testRateLimitTimeoutExhausted() { @Test void testOtherErrorCodesTimeout() { - // Test other error codes (e.g., 500) using the default 10-second timeout - int otherErrorsTimeout = 10; // RetryUtils.REQUEST_TIMEOUT_SECONDS - int delaySeconds = otherErrorsTimeout / 3; + // Test other error codes (e.g., 500) using the default 120-second timeout + int otherErrorsTimeout = 120; // RetryUtils.REQUEST_TIMEOUT_SECONDS + int delaySeconds = otherErrorsTimeout / 4; assertTrue( timeoutManager.evaluateRetryTimeoutForResponse( HttpStatus.SC_INTERNAL_SERVER_ERROR, delaySeconds * 1000, false)); @@ -87,9 +87,9 @@ void testOtherErrorCodesTimeout() { @Test void testExceptionTimeout() { - // Test exception timeout (default 10 seconds) - int exceptionTimeout = 10; // RetryUtils.REQUEST_EXCEPTION_TIMEOUT_SECONDS - int delaySeconds = exceptionTimeout / 3; + // Test exception timeout (default 120 seconds) + int exceptionTimeout = 120; // RetryUtils.REQUEST_EXCEPTION_TIMEOUT_SECONDS + int delaySeconds = exceptionTimeout / 4; assertTrue(timeoutManager.evaluateRetryTimeoutForException(delaySeconds * 1000)); assertTrue(timeoutManager.evaluateRetryTimeoutForException(delaySeconds * 1000)); assertTrue(timeoutManager.evaluateRetryTimeoutForException(delaySeconds * 1000)); From e0615cc0e6003fe7219335fa5cd088d04e356a66 Mon Sep 17 00:00:00 2001 From: nishkarsh-db Date: Mon, 23 Feb 2026 15:07:46 +0530 Subject: [PATCH 5/5] Address PR review comments - Add debug logs to RetryTimeoutManager explaining why retry was stopped - Move timeout exhaustion checks into switch statement cases - Add comment explaining why extractRetryException skips top-level exception - Rename timeout constants to DEFAULT_REQUEST_TIMEOUT_SECONDS and DEFAULT_REQUEST_EXCEPTION_TIMEOUT_SECONDS - Fix Javadoc to remove unused @param exception Changes address review feedback from @vikrantpuppala: - Debug logs make retry stop reason abundantly clear - Timeout checks moved to individual switch cases for clarity - Comment added to extractRetryException about TTransportException wrapper - Constants renamed with DEFAULT_ prefix for clarity Co-authored-by: Cursor --- .../impl/http/RetryTimeoutManager.java | 47 +++++++++++++++---- .../jdbc/dbclient/impl/http/RetryUtils.java | 8 ++-- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java index 14eacd143f..676491a92e 100644 --- a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java +++ b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java @@ -1,6 +1,8 @@ package com.databricks.jdbc.dbclient.impl.http; import com.databricks.jdbc.api.internal.IDatabricksConnectionContext; +import com.databricks.jdbc.log.JdbcLogger; +import com.databricks.jdbc.log.JdbcLoggerFactory; import org.apache.http.HttpStatus; /** @@ -9,6 +11,7 @@ * timeouts accordingly. */ public class RetryTimeoutManager { + private static final JdbcLogger LOGGER = JdbcLoggerFactory.getLogger(RetryTimeoutManager.class); private long tempUnavailableTimeoutMillis; private long rateLimitTimeoutMillis; private long otherErrorCodesTimeoutMillis; @@ -25,8 +28,8 @@ public RetryTimeoutManager(IDatabricksConnectionContext connectionContext) { this.tempUnavailableTimeoutMillis = connectionContext.getTemporarilyUnavailableRetryTimeout() * 1000L; this.rateLimitTimeoutMillis = connectionContext.getRateLimitRetryTimeout() * 1000L; - this.otherErrorCodesTimeoutMillis = RetryUtils.REQUEST_TIMEOUT_SECONDS * 1000L; - this.exceptionTimeoutMillis = RetryUtils.REQUEST_EXCEPTION_TIMEOUT_SECONDS * 1000L; + this.otherErrorCodesTimeoutMillis = RetryUtils.DEFAULT_REQUEST_TIMEOUT_SECONDS * 1000L; + this.exceptionTimeoutMillis = RetryUtils.DEFAULT_REQUEST_EXCEPTION_TIMEOUT_SECONDS * 1000L; this.apiRetriableCodesTimeoutMillis = connectionContext.getApiRetryTimeout() * 1000L; } @@ -44,32 +47,53 @@ public boolean evaluateRetryTimeoutForResponse( // If this is a custom API retriable code, only deduct from API codes timeout if (isApiRetriableCode) { apiRetriableCodesTimeoutMillis -= retryDelayMillis; - return apiRetriableCodesTimeoutMillis > 0; + if (apiRetriableCodesTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: API retriable codes timeout exhausted. Remaining: {}ms", + apiRetriableCodesTimeoutMillis); + return false; + } + return true; } // Otherwise, update the appropriate timeout based on status code switch (statusCode) { case HttpStatus.SC_SERVICE_UNAVAILABLE: tempUnavailableTimeoutMillis -= retryDelayMillis; + if (tempUnavailableTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: Service unavailable (503) timeout exhausted. Remaining: {}ms", + tempUnavailableTimeoutMillis); + return false; + } break; case HttpStatus.SC_TOO_MANY_REQUESTS: rateLimitTimeoutMillis -= retryDelayMillis; + if (rateLimitTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: Rate limit (429) timeout exhausted. Remaining: {}ms", + rateLimitTimeoutMillis); + return false; + } break; default: otherErrorCodesTimeoutMillis -= retryDelayMillis; + if (otherErrorCodesTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: Other error codes timeout exhausted for status {}. Remaining: {}ms", + statusCode, + otherErrorCodesTimeoutMillis); + return false; + } break; } - // Check if any timeout has been exceeded - return tempUnavailableTimeoutMillis > 0 - && rateLimitTimeoutMillis > 0 - && otherErrorCodesTimeoutMillis > 0; + return true; } /** * Evaluates retry decision based on an exception and updates timeout accordingly. * - * @param exception the exception that occurred during request execution * @param retryDelayMillis the retry delay in milliseconds to subtract from timeout * @return true if the request should be retried, false otherwise */ @@ -78,6 +102,11 @@ public boolean evaluateRetryTimeoutForException(int retryDelayMillis) { exceptionTimeoutMillis -= retryDelayMillis; // Check if exception timeout has been exceeded - return exceptionTimeoutMillis > 0; + if (exceptionTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: Exception timeout exhausted. Remaining: {}ms", exceptionTimeoutMillis); + return false; + } + return true; } } diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java index b3e05c5b59..2adab17101 100644 --- a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java +++ b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java @@ -7,16 +7,18 @@ * strategies and handlers. */ public class RetryUtils { - public static final long REQUEST_TIMEOUT_SECONDS = 120; - public static final long REQUEST_EXCEPTION_TIMEOUT_SECONDS = 120; + public static final long DEFAULT_REQUEST_TIMEOUT_SECONDS = 120; + public static final long DEFAULT_REQUEST_EXCEPTION_TIMEOUT_SECONDS = 120; /** - * Extracts DatabricksRetryHandlerException from the exception cause chain. + * Extracts DatabricksRetryHandlerException from the exception cause chain. Skips the top-level + * exception as it's typically a TTransportException wrapper. * * @param e the exception to search through * @return the DatabricksRetryHandlerException if found, null otherwise */ public static DatabricksRetryHandlerException extractRetryException(Throwable e) { + // Start with cause to skip the top-level TTransportException wrapper Throwable cause = e.getCause(); while (cause != null) { if (cause instanceof DatabricksRetryHandlerException) {