Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## [Unreleased]

### Added
- Added AI coding agent detection to the User-Agent header. When the driver is invoked by a known AI coding agent (e.g. Claude Code, Cursor, Gemini CLI), `agent/<product>` is appended to the User-Agent string.
- Added streaming prefetch mode for Thrift inline results (columnar and Arrow) with background batch prefetching and configurable sliding window for improved throughput.
- Added `EnableInlineStreaming` connection parameter to enable/disable streaming mode (default: enabled).
- Added `ThriftMaxBatchesInMemory` connection parameter to control the sliding window size for streaming (default: 3).
Expand Down
78 changes: 78 additions & 0 deletions src/main/java/com/databricks/jdbc/common/util/AgentDetector.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package com.databricks.jdbc.common.util;

import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;

/**
* Detects whether the JDBC driver is being invoked by an AI coding agent by checking for well-known
* environment variables that agents set in their spawned shell processes.
*
* <p>Detection only succeeds when exactly one agent environment variable is present, to avoid
* ambiguous attribution when multiple agent environments overlap.
*
* <p>Adding a new agent requires only a new constant and a new entry in {@link #KNOWN_AGENTS}.
*
* <p>References for each environment variable:
*
* <ul>
* <li>ANTIGRAVITY_AGENT: Closed source. Google Antigravity sets this variable.
* <li>CLAUDECODE: https://github.com/anthropics/claude-code (sets CLAUDECODE=1)
* <li>CLINE_ACTIVE: https://github.com/cline/cline (shipped in v3.24.0)
* <li>CODEX_CI: https://github.com/openai/codex (part of UNIFIED_EXEC_ENV array in codex-rs)
* <li>CURSOR_AGENT: Closed source. Referenced in a gist by johnlindquist.
* <li>GEMINI_CLI: https://google-gemini.github.io/gemini-cli/docs/tools/shell.html (sets
* GEMINI_CLI=1)
* <li>OPENCODE: https://github.com/opencode-ai/opencode (sets OPENCODE=1)
* </ul>
*/
public class AgentDetector {

public static final String ANTIGRAVITY = "antigravity";
public static final String CLAUDE_CODE = "claude-code";
public static final String CLINE = "cline";
public static final String CODEX = "codex";
public static final String CURSOR = "cursor";
public static final String GEMINI_CLI = "gemini-cli";
public static final String OPEN_CODE = "opencode";

static final String[][] KNOWN_AGENTS = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need a 1-1 mapping instead of simple array or list of strings ? is there any backend logic that process these strings which require them to be in this certain format ?

{"ANTIGRAVITY_AGENT", ANTIGRAVITY},
{"CLAUDECODE", CLAUDE_CODE},
{"CLINE_ACTIVE", CLINE},
{"CODEX_CI", CODEX},
{"CURSOR_AGENT", CURSOR},
{"GEMINI_CLI", GEMINI_CLI},
{"OPENCODE", OPEN_CODE},
};

/**
* Detects which AI coding agent (if any) is driving the current process.
*
* @return the agent product string if exactly one agent is detected, or an empty string otherwise
*/
public static String detect() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

General convention is to use null or better yet java.util.Optional to signify a missing value. Empty string can be misleading.

return detect(System::getenv);
}

/**
* Detects which AI coding agent (if any) is present, using the provided function to look up
* environment variables. This overload exists for testability.
*
* @param envLookup function that returns the value of an environment variable, or null if unset
* @return the agent product string if exactly one agent is detected, or an empty string otherwise
*/
static String detect(Function<String, String> envLookup) {
List<String> detected = new ArrayList<>();
for (String[] entry : KNOWN_AGENTS) {
String value = envLookup.apply(entry[0]);
if (value != null && !value.isEmpty()) {
detected.add(entry[1]);
}
}
if (detected.size() == 1) {
return detected.get(0);
}
return "";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public class UserAgentManager {
public static final String USER_AGENT_SEA_CLIENT = "SQLExecHttpClient";
public static final String USER_AGENT_THRIFT_CLIENT = "THttpClient";
private static final String VERSION_FILLER = "version";
private static final String AGENT_KEY = "agent";

/**
* Parse custom user agent string into name and version components.
Expand Down Expand Up @@ -62,6 +63,12 @@ public static void setUserAgent(IDatabricksConnectionContext connectionContext)
}
}
}

// Detect AI coding agent and append to user agent
String agentProduct = AgentDetector.detect();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope this does not break the user-agent parsing functionality in https://github.com/databricks-eng/universe/commit/d74e4f8861d34f726b035e15653110ba1879583c

As long as that is the case, I am good

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same for other drivers too.

if (!agentProduct.isEmpty()) {
UserAgent.withOtherInfo(AGENT_KEY, agentProduct);
}
}

/**
Expand Down Expand Up @@ -106,6 +113,12 @@ public static String buildUserAgentForConnectorService(
}
}

// Detect AI coding agent and append to user agent
String agentProduct = AgentDetector.detect();
if (!agentProduct.isEmpty()) {
userAgent.append(" ").append(AGENT_KEY).append("/").append(agentProduct);
}

return userAgent.toString();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package com.databricks.jdbc.common.util;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class AgentDetectorTest {

/** Creates an env lookup function that returns values from the given map. */
private static java.util.function.Function<String, String> envWith(Map<String, String> env) {
return env::get;
}

@ParameterizedTest
@MethodSource("singleAgentCases")
void testDetectsSingleAgent(String envVar, String expectedProduct) {
Map<String, String> env = new HashMap<>();
env.put(envVar, "1");
assertEquals(expectedProduct, AgentDetector.detect(envWith(env)));
}

static Stream<Arguments> singleAgentCases() {
return Stream.of(
Arguments.of("ANTIGRAVITY_AGENT", AgentDetector.ANTIGRAVITY),
Arguments.of("CLAUDECODE", AgentDetector.CLAUDE_CODE),
Arguments.of("CLINE_ACTIVE", AgentDetector.CLINE),
Arguments.of("CODEX_CI", AgentDetector.CODEX),
Arguments.of("CURSOR_AGENT", AgentDetector.CURSOR),
Arguments.of("GEMINI_CLI", AgentDetector.GEMINI_CLI),
Arguments.of("OPENCODE", AgentDetector.OPEN_CODE));
}

@Test
void testReturnsEmptyWhenNoAgentDetected() {
Map<String, String> env = new HashMap<>();
assertEquals("", AgentDetector.detect(envWith(env)));
}

@Test
void testReturnsEmptyWhenMultipleAgentsDetected() {
Map<String, String> env = new HashMap<>();
env.put("CLAUDECODE", "1");
env.put("CURSOR_AGENT", "1");
assertEquals("", AgentDetector.detect(envWith(env)));
}

@Test
void testIgnoresEmptyEnvVarValues() {
Map<String, String> env = new HashMap<>();
env.put("CLAUDECODE", "");
assertEquals("", AgentDetector.detect(envWith(env)));
}

@Test
void testIgnoresNullEnvVarValues() {
Map<String, String> env = new HashMap<>();
env.put("CLAUDECODE", null);
assertEquals("", AgentDetector.detect(envWith(env)));
}

@Test
void testAllKnownAgentsAreCovered() {
// Verify every entry in KNOWN_AGENTS can be detected individually
for (String[] entry : AgentDetector.KNOWN_AGENTS) {
Map<String, String> env = new HashMap<>();
env.put(entry[0], "1");
assertEquals(
entry[1],
AgentDetector.detect(envWith(env)),
"Agent with env var " + entry[0] + " should be detected as " + entry[1]);
}
}
}
Loading