From 5a92f3f33bb2b21b1f4a1516de8b549bc1cd9fd8 Mon Sep 17 00:00:00 2001 From: samikshya-chand_data Date: Mon, 2 Mar 2026 17:45:08 +0530 Subject: [PATCH] Fix TIMEDOUT_STATE not recognized as error on interactive clusters When using interactive clusters with enableDirectResults=true, the server can return TIMEDOUT_STATE directly in directResults.operationStatus when the cluster's own query timeout fires before the client's polling loop starts. Because TIMEDOUT_STATE was not included in isErrorOperationState, the driver silently fell through to executeFetchRequest and threw DatabricksHttpException instead of DatabricksTimeoutException. Fix isErrorOperationState to include TIMEDOUT_STATE, and update checkOperationStatusForErrors to throw DatabricksTimeoutException for TIMEDOUT_STATE regardless of whether sqlState is set, since interactive clusters do not always populate the SQL state field. Add tests covering: - TIMEDOUT_STATE in directResults (server timeout fires before polling starts) - TIMEDOUT_STATE returned during polling Signed-off-by: Samikshya Chand Signed-off-by: samikshya-chand_data --- .../impl/thrift/DatabricksThriftAccessor.java | 7 ++- .../thrift/DatabricksThriftAccessorTest.java | 63 +++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/thrift/DatabricksThriftAccessor.java b/src/main/java/com/databricks/jdbc/dbclient/impl/thrift/DatabricksThriftAccessor.java index 14af79bcb..b28a7a770 100644 --- a/src/main/java/com/databricks/jdbc/dbclient/impl/thrift/DatabricksThriftAccessor.java +++ b/src/main/java/com/databricks/jdbc/dbclient/impl/thrift/DatabricksThriftAccessor.java @@ -768,7 +768,8 @@ && isErrorOperationState(statusResp.getOperationState())) { LOGGER.error(errorMsg); String sqlState = statusResp.getSqlState(); - if (QUERY_EXECUTION_TIMEOUT_SQLSTATE.equals(sqlState)) { + if (QUERY_EXECUTION_TIMEOUT_SQLSTATE.equals(sqlState) + || statusResp.getOperationState() == TOperationState.TIMEDOUT_STATE) { throw new DatabricksTimeoutException( errorMsg, null, DatabricksDriverErrorCode.OPERATION_TIMEOUT_ERROR); } @@ -805,7 +806,9 @@ private boolean isErrorStatusCode(TStatus status) { } private boolean isErrorOperationState(TOperationState state) { - return state == TOperationState.ERROR_STATE || state == TOperationState.CLOSED_STATE; + return state == TOperationState.ERROR_STATE + || state == TOperationState.CLOSED_STATE + || state == TOperationState.TIMEDOUT_STATE; } private boolean isPendingOperationState(TOperationState state) { diff --git a/src/test/java/com/databricks/jdbc/dbclient/impl/thrift/DatabricksThriftAccessorTest.java b/src/test/java/com/databricks/jdbc/dbclient/impl/thrift/DatabricksThriftAccessorTest.java index f67e0e497..1d17e8a5c 100644 --- a/src/test/java/com/databricks/jdbc/dbclient/impl/thrift/DatabricksThriftAccessorTest.java +++ b/src/test/java/com/databricks/jdbc/dbclient/impl/thrift/DatabricksThriftAccessorTest.java @@ -893,6 +893,69 @@ void testServerSideTimeoutThrowsTimeoutException() throws TException, SQLExcepti () -> accessor.execute(request, parentStatement, session, StatementType.SQL)); } + @Test + void testTimedOutStateInDirectResultsThrowsTimeoutException() + throws TException, SQLException, DatabricksValidationException { + // Reproduces the interactive cluster Case B: server enforces query timeout and returns + // TIMEDOUT_STATE directly in directResults before the client polling loop starts. + // Previously isErrorOperationState excluded TIMEDOUT_STATE, causing the driver to fall + // through to executeFetchRequest and throw DatabricksHttpException instead. + setup(true); + + TExecuteStatementReq request = new TExecuteStatementReq(); + TSparkDirectResults timedOutDirectResults = + new TSparkDirectResults() + .setOperationStatus( + new TGetOperationStatusResp() + .setStatus(new TStatus().setStatusCode(TStatusCode.SUCCESS_STATUS)) + .setOperationState(TOperationState.TIMEDOUT_STATE) + .setErrorMessage("Query timed out after 1 seconds")); + TExecuteStatementResp tExecuteStatementResp = + new TExecuteStatementResp() + .setOperationHandle(tOperationHandle) + .setStatus(new TStatus().setStatusCode(TStatusCode.SUCCESS_STATUS)) + .setDirectResults(timedOutDirectResults); + when(thriftClient.ExecuteStatement(request)).thenReturn(tExecuteStatementResp); + + Statement statement = mock(Statement.class); + when(parentStatement.getStatement()).thenReturn(statement); + when(statement.getQueryTimeout()).thenReturn(300); // Long client timeout — server fires first + + assertThrows( + DatabricksTimeoutException.class, + () -> accessor.execute(request, parentStatement, session, StatementType.SQL)); + } + + @Test + void testTimedOutStateDuringPollingThrowsTimeoutException() + throws TException, SQLException, DatabricksValidationException { + // Server returns RUNNING_STATE initially, then TIMEDOUT_STATE during polling — + // e.g. cluster enforces its own max query duration while client timeout is longer. + setup(true); + + TExecuteStatementReq request = new TExecuteStatementReq(); + TExecuteStatementResp tExecuteStatementResp = + new TExecuteStatementResp() + .setOperationHandle(tOperationHandle) + .setStatus(new TStatus().setStatusCode(TStatusCode.SUCCESS_STATUS)); + when(thriftClient.ExecuteStatement(request)).thenReturn(tExecuteStatementResp); + + TGetOperationStatusResp timedOutStatusResp = + new TGetOperationStatusResp() + .setStatus(new TStatus().setStatusCode(TStatusCode.SUCCESS_STATUS)) + .setOperationState(TOperationState.TIMEDOUT_STATE) + .setErrorMessage("Query timed out after 1 seconds"); + when(thriftClient.GetOperationStatus(operationStatusReq)).thenReturn(timedOutStatusResp); + + Statement statement = mock(Statement.class); + when(parentStatement.getStatement()).thenReturn(statement); + when(statement.getQueryTimeout()).thenReturn(300); // Long client timeout — server fires first + + assertThrows( + DatabricksTimeoutException.class, + () -> accessor.execute(request, parentStatement, session, StatementType.SQL)); + } + @Test void testFetchResultsWithCustomMaxRowsPerBlock() throws TException, SQLException, DatabricksValidationException {