diff --git a/src/main/java/com/databricks/jdbc/common/RequestType.java b/src/main/java/com/databricks/jdbc/common/RequestType.java new file mode 100644 index 0000000000..fa04d5fbf0 --- /dev/null +++ b/src/main/java/com/databricks/jdbc/common/RequestType.java @@ -0,0 +1,31 @@ +package com.databricks.jdbc.common; + +public enum RequestType { + UNKNOWN(RetryPolicy.NON_IDEMPOTENT), + FETCH_FEATURE_FLAGS(RetryPolicy.IDEMPOTENT), + THRIFT_OPEN_SESSION(RetryPolicy.IDEMPOTENT), + THRIFT_CLOSE_SESSION(RetryPolicy.IDEMPOTENT), + THRIFT_METADATA(RetryPolicy.IDEMPOTENT), + THRIFT_CLOSE_OPERATION(RetryPolicy.IDEMPOTENT), + THRIFT_CANCEL_OPERATION(RetryPolicy.IDEMPOTENT), + THRIFT_EXECUTE_STATEMENT(RetryPolicy.NON_IDEMPOTENT), + THRIFT_FETCH_RESULTS(RetryPolicy.NON_IDEMPOTENT), + CLOUD_FETCH(RetryPolicy.IDEMPOTENT), + VOLUME_LIST(RetryPolicy.IDEMPOTENT), + VOLUME_SHOW_VOLUMES(RetryPolicy.IDEMPOTENT), + VOLUME_GET(RetryPolicy.IDEMPOTENT), + VOLUME_PUT(RetryPolicy.NON_IDEMPOTENT), + VOLUME_DELETE(RetryPolicy.IDEMPOTENT), + AUTH(RetryPolicy.IDEMPOTENT), + TELEMETRY_PUSH(RetryPolicy.IDEMPOTENT); + + private final RetryPolicy retryPolicy; + + RequestType(RetryPolicy retryPolicy) { + this.retryPolicy = retryPolicy; + } + + public RetryPolicy getRetryPolicy() { + return retryPolicy; + } +} diff --git a/src/main/java/com/databricks/jdbc/common/RetryPolicy.java b/src/main/java/com/databricks/jdbc/common/RetryPolicy.java new file mode 100644 index 0000000000..301bba6c89 --- /dev/null +++ b/src/main/java/com/databricks/jdbc/common/RetryPolicy.java @@ -0,0 +1,15 @@ +package com.databricks.jdbc.common; + +public enum RetryPolicy { + /** + * Idempotent requests can be safely retried multiple times without side effects. Examples: + * Metadata Queries. + */ + IDEMPOTENT, + + /** + * Non-idempotent requests may have side effects and should be retried carefully. Example: Execute + * Statement + */ + NON_IDEMPOTENT +} diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java new file mode 100644 index 0000000000..676491a92e --- /dev/null +++ b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManager.java @@ -0,0 +1,112 @@ +package com.databricks.jdbc.dbclient.impl.http; + +import com.databricks.jdbc.api.internal.IDatabricksConnectionContext; +import com.databricks.jdbc.log.JdbcLogger; +import com.databricks.jdbc.log.JdbcLoggerFactory; +import org.apache.http.HttpStatus; + +/** + * Manages retry decisions and timeout updates based on HTTP responses and exceptions. Coordinates + * with retry strategies to determine whether requests should be retried and updates request + * timeouts accordingly. + */ +public class RetryTimeoutManager { + private static final JdbcLogger LOGGER = JdbcLoggerFactory.getLogger(RetryTimeoutManager.class); + private long tempUnavailableTimeoutMillis; + private long rateLimitTimeoutMillis; + private long otherErrorCodesTimeoutMillis; + private long exceptionTimeoutMillis; + private long apiRetriableCodesTimeoutMillis; + + /** + * Creates a new RetryTimeoutManager with connection context. + * + * @param connectionContext the connection context for timeout configurations + */ + public RetryTimeoutManager(IDatabricksConnectionContext connectionContext) { + // Initialize timeouts + this.tempUnavailableTimeoutMillis = + connectionContext.getTemporarilyUnavailableRetryTimeout() * 1000L; + this.rateLimitTimeoutMillis = connectionContext.getRateLimitRetryTimeout() * 1000L; + this.otherErrorCodesTimeoutMillis = RetryUtils.DEFAULT_REQUEST_TIMEOUT_SECONDS * 1000L; + this.exceptionTimeoutMillis = RetryUtils.DEFAULT_REQUEST_EXCEPTION_TIMEOUT_SECONDS * 1000L; + this.apiRetriableCodesTimeoutMillis = connectionContext.getApiRetryTimeout() * 1000L; + } + + /** + * Evaluates retry decision based on HTTP status code and updates timeout accordingly. Uses the + * Retry-After header value when provided by the strategy. + * + * @param statusCode the HTTP status code from the response + * @param retryDelayMillis the retry delay in milliseconds to subtract from timeout + * @param isApiRetriableCode true if this is a custom API retriable code, false otherwise + * @return true if the request should be retried, false otherwise + */ + public boolean evaluateRetryTimeoutForResponse( + int statusCode, int retryDelayMillis, boolean isApiRetriableCode) { + // If this is a custom API retriable code, only deduct from API codes timeout + if (isApiRetriableCode) { + apiRetriableCodesTimeoutMillis -= retryDelayMillis; + if (apiRetriableCodesTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: API retriable codes timeout exhausted. Remaining: {}ms", + apiRetriableCodesTimeoutMillis); + return false; + } + return true; + } + + // Otherwise, update the appropriate timeout based on status code + switch (statusCode) { + case HttpStatus.SC_SERVICE_UNAVAILABLE: + tempUnavailableTimeoutMillis -= retryDelayMillis; + if (tempUnavailableTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: Service unavailable (503) timeout exhausted. Remaining: {}ms", + tempUnavailableTimeoutMillis); + return false; + } + break; + case HttpStatus.SC_TOO_MANY_REQUESTS: + rateLimitTimeoutMillis -= retryDelayMillis; + if (rateLimitTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: Rate limit (429) timeout exhausted. Remaining: {}ms", + rateLimitTimeoutMillis); + return false; + } + break; + default: + otherErrorCodesTimeoutMillis -= retryDelayMillis; + if (otherErrorCodesTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: Other error codes timeout exhausted for status {}. Remaining: {}ms", + statusCode, + otherErrorCodesTimeoutMillis); + return false; + } + break; + } + + return true; + } + + /** + * Evaluates retry decision based on an exception and updates timeout accordingly. + * + * @param retryDelayMillis the retry delay in milliseconds to subtract from timeout + * @return true if the request should be retried, false otherwise + */ + public boolean evaluateRetryTimeoutForException(int retryDelayMillis) { + // Update exception timeout by subtracting the retry delay + exceptionTimeoutMillis -= retryDelayMillis; + + // Check if exception timeout has been exceeded + if (exceptionTimeoutMillis <= 0) { + LOGGER.debug( + "Retry stopped: Exception timeout exhausted. Remaining: {}ms", exceptionTimeoutMillis); + return false; + } + return true; + } +} diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java new file mode 100644 index 0000000000..2adab17101 --- /dev/null +++ b/src/main/java/com/databricks/jdbc/dbclient/impl/http/RetryUtils.java @@ -0,0 +1,31 @@ +package com.databricks.jdbc.dbclient.impl.http; + +import com.databricks.jdbc.exception.DatabricksRetryHandlerException; + +/** + * Utility class containing common retry handling helper functions used across different retry + * strategies and handlers. + */ +public class RetryUtils { + public static final long DEFAULT_REQUEST_TIMEOUT_SECONDS = 120; + public static final long DEFAULT_REQUEST_EXCEPTION_TIMEOUT_SECONDS = 120; + + /** + * Extracts DatabricksRetryHandlerException from the exception cause chain. Skips the top-level + * exception as it's typically a TTransportException wrapper. + * + * @param e the exception to search through + * @return the DatabricksRetryHandlerException if found, null otherwise + */ + public static DatabricksRetryHandlerException extractRetryException(Throwable e) { + // Start with cause to skip the top-level TTransportException wrapper + Throwable cause = e.getCause(); + while (cause != null) { + if (cause instanceof DatabricksRetryHandlerException) { + return (DatabricksRetryHandlerException) cause; + } + cause = cause.getCause(); + } + return null; + } +} diff --git a/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java new file mode 100644 index 0000000000..9f18d44476 --- /dev/null +++ b/src/test/java/com/databricks/jdbc/dbclient/impl/http/RetryTimeoutManagerTest.java @@ -0,0 +1,163 @@ +package com.databricks.jdbc.dbclient.impl.http; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +import com.databricks.jdbc.api.internal.IDatabricksConnectionContext; +import org.apache.http.HttpStatus; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class RetryTimeoutManagerTest { + + private static final int TEMP_UNAVAILABLE_TIMEOUT_SECONDS = 120; + private static final int RATE_LIMIT_TIMEOUT_SECONDS = 300; + private static final int API_RETRIABLE_TIMEOUT_SECONDS = 300; + + private IDatabricksConnectionContext mockContext; + private RetryTimeoutManager timeoutManager; + + @BeforeEach + void setUp() { + mockContext = mock(IDatabricksConnectionContext.class); + when(mockContext.getTemporarilyUnavailableRetryTimeout()) + .thenReturn(TEMP_UNAVAILABLE_TIMEOUT_SECONDS); + when(mockContext.getRateLimitRetryTimeout()).thenReturn(RATE_LIMIT_TIMEOUT_SECONDS); + when(mockContext.getApiRetryTimeout()).thenReturn(API_RETRIABLE_TIMEOUT_SECONDS); + timeoutManager = new RetryTimeoutManager(mockContext); + } + + @Test + void testServiceUnavailableTimeoutExhausted() { + // Make multiple retries that exhaust the service unavailable timeout + int delaySeconds = TEMP_UNAVAILABLE_TIMEOUT_SECONDS / 4; + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, delaySeconds * 1000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, delaySeconds * 1000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, delaySeconds * 1000, false)); + // This should exhaust the timeout + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, (delaySeconds + 1) * 1000, false)); + } + + @Test + void testRateLimitTimeoutExhausted() { + // Make multiple retries that exhaust the rate limit timeout + int delaySeconds = RATE_LIMIT_TIMEOUT_SECONDS / 4; + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, delaySeconds * 1000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, delaySeconds * 1000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, delaySeconds * 1000, false)); + // This should exhaust the timeout + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, (delaySeconds + 1) * 1000, false)); + } + + @Test + void testOtherErrorCodesTimeout() { + // Test other error codes (e.g., 500) using the default 120-second timeout + int otherErrorsTimeout = 120; // RetryUtils.REQUEST_TIMEOUT_SECONDS + int delaySeconds = otherErrorsTimeout / 4; + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, delaySeconds * 1000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, delaySeconds * 1000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, delaySeconds * 1000, false)); + // This should exhaust the timeout + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, (delaySeconds + 1) * 1000, false)); + } + + @Test + void testExceptionTimeout() { + // Test exception timeout (default 120 seconds) + int exceptionTimeout = 120; // RetryUtils.REQUEST_EXCEPTION_TIMEOUT_SECONDS + int delaySeconds = exceptionTimeout / 4; + assertTrue(timeoutManager.evaluateRetryTimeoutForException(delaySeconds * 1000)); + assertTrue(timeoutManager.evaluateRetryTimeoutForException(delaySeconds * 1000)); + assertTrue(timeoutManager.evaluateRetryTimeoutForException(delaySeconds * 1000)); + // This should exhaust the timeout + assertFalse(timeoutManager.evaluateRetryTimeoutForException((delaySeconds + 1) * 1000)); + } + + @Test + void testMixedRetries() { + // Test combination of different status codes + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 20000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_TOO_MANY_REQUESTS, 50000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_INTERNAL_SERVER_ERROR, 2000, false)); + assertTrue(timeoutManager.evaluateRetryTimeoutForException(2000)); + + // All timeouts should still have capacity + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 10000, false)); + } + + @Test + void testImmediateTimeoutExhaustion() { + // A single large delay can exhaust the timeout + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, + (TEMP_UNAVAILABLE_TIMEOUT_SECONDS + 1) * 1000, + false)); + } + + @Test + void testApiRetriableCodesTimeout() { + // Test custom API retriable codes (e.g., 404) using the API retry timeout + // When isApiRetriableCode=true, only API codes timeout is used + int delaySeconds = API_RETRIABLE_TIMEOUT_SECONDS / 4; + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, delaySeconds * 1000, true)); + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, delaySeconds * 1000, true)); + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, delaySeconds * 1000, true)); + // This should exhaust the timeout + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse(404, (delaySeconds + 1) * 1000, true)); + } + + @Test + void testApiRetriableCodesIndependentTimeout() { + // Test that API retriable codes have independent timeout from standard codes + // Exhaust standard 503 timeout + int delaySeconds = TEMP_UNAVAILABLE_TIMEOUT_SECONDS / 2; + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, delaySeconds * 1000, false)); + assertTrue( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, (delaySeconds - 1) * 1000, false)); + assertFalse( + timeoutManager.evaluateRetryTimeoutForResponse( + HttpStatus.SC_SERVICE_UNAVAILABLE, 2000, false)); + + // API retriable codes should still work + int apiDelaySeconds = API_RETRIABLE_TIMEOUT_SECONDS / 3; + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, apiDelaySeconds * 1000, true)); + assertTrue(timeoutManager.evaluateRetryTimeoutForResponse(404, apiDelaySeconds * 1000, true)); + } +}