-
Notifications
You must be signed in to change notification settings - Fork 33
[PECOBLR-1928] Add AI coding agent detection to User-Agent header #1230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| package com.databricks.jdbc.common.util; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.function.Function; | ||
|
|
||
| /** | ||
| * Detects whether the JDBC driver is being invoked by an AI coding agent by checking for well-known | ||
| * environment variables that agents set in their spawned shell processes. | ||
| * | ||
| * <p>Detection only succeeds when exactly one agent environment variable is present, to avoid | ||
| * ambiguous attribution when multiple agent environments overlap. | ||
| * | ||
| * <p>Adding a new agent requires only a new constant and a new entry in {@link #KNOWN_AGENTS}. | ||
| * | ||
| * <p>References for each environment variable: | ||
| * | ||
| * <ul> | ||
| * <li>ANTIGRAVITY_AGENT: Closed source. Google Antigravity sets this variable. | ||
| * <li>CLAUDECODE: https://github.com/anthropics/claude-code (sets CLAUDECODE=1) | ||
| * <li>CLINE_ACTIVE: https://github.com/cline/cline (shipped in v3.24.0) | ||
| * <li>CODEX_CI: https://github.com/openai/codex (part of UNIFIED_EXEC_ENV array in codex-rs) | ||
| * <li>CURSOR_AGENT: Closed source. Referenced in a gist by johnlindquist. | ||
| * <li>GEMINI_CLI: https://google-gemini.github.io/gemini-cli/docs/tools/shell.html (sets | ||
| * GEMINI_CLI=1) | ||
| * <li>OPENCODE: https://github.com/opencode-ai/opencode (sets OPENCODE=1) | ||
| * </ul> | ||
| */ | ||
| public class AgentDetector { | ||
|
|
||
| public static final String ANTIGRAVITY = "antigravity"; | ||
| public static final String CLAUDE_CODE = "claude-code"; | ||
| public static final String CLINE = "cline"; | ||
| public static final String CODEX = "codex"; | ||
| public static final String CURSOR = "cursor"; | ||
| public static final String GEMINI_CLI = "gemini-cli"; | ||
| public static final String OPEN_CODE = "opencode"; | ||
|
|
||
| static final String[][] KNOWN_AGENTS = { | ||
| {"ANTIGRAVITY_AGENT", ANTIGRAVITY}, | ||
| {"CLAUDECODE", CLAUDE_CODE}, | ||
| {"CLINE_ACTIVE", CLINE}, | ||
| {"CODEX_CI", CODEX}, | ||
| {"CURSOR_AGENT", CURSOR}, | ||
| {"GEMINI_CLI", GEMINI_CLI}, | ||
| {"OPENCODE", OPEN_CODE}, | ||
| }; | ||
|
|
||
| /** | ||
| * Detects which AI coding agent (if any) is driving the current process. | ||
| * | ||
| * @return the agent product string if exactly one agent is detected, or an empty string otherwise | ||
| */ | ||
| public static String detect() { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. General convention is to use |
||
| return detect(System::getenv); | ||
| } | ||
|
|
||
| /** | ||
| * Detects which AI coding agent (if any) is present, using the provided function to look up | ||
| * environment variables. This overload exists for testability. | ||
| * | ||
| * @param envLookup function that returns the value of an environment variable, or null if unset | ||
| * @return the agent product string if exactly one agent is detected, or an empty string otherwise | ||
| */ | ||
| static String detect(Function<String, String> envLookup) { | ||
| List<String> detected = new ArrayList<>(); | ||
| for (String[] entry : KNOWN_AGENTS) { | ||
| String value = envLookup.apply(entry[0]); | ||
| if (value != null && !value.isEmpty()) { | ||
| detected.add(entry[1]); | ||
| } | ||
| } | ||
| if (detected.size() == 1) { | ||
| return detected.get(0); | ||
| } | ||
| return ""; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ public class UserAgentManager { | |
| public static final String USER_AGENT_SEA_CLIENT = "SQLExecHttpClient"; | ||
| public static final String USER_AGENT_THRIFT_CLIENT = "THttpClient"; | ||
| private static final String VERSION_FILLER = "version"; | ||
| private static final String AGENT_KEY = "agent"; | ||
|
|
||
| /** | ||
| * Parse custom user agent string into name and version components. | ||
|
|
@@ -62,6 +63,12 @@ public static void setUserAgent(IDatabricksConnectionContext connectionContext) | |
| } | ||
| } | ||
| } | ||
|
|
||
| // Detect AI coding agent and append to user agent | ||
| String agentProduct = AgentDetector.detect(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope this does not break the user-agent parsing functionality in https://github.com/databricks-eng/universe/commit/d74e4f8861d34f726b035e15653110ba1879583c As long as that is the case, I am good
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same for other drivers too. |
||
| if (!agentProduct.isEmpty()) { | ||
| UserAgent.withOtherInfo(AGENT_KEY, agentProduct); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -106,6 +113,12 @@ public static String buildUserAgentForConnectorService( | |
| } | ||
| } | ||
|
|
||
| // Detect AI coding agent and append to user agent | ||
| String agentProduct = AgentDetector.detect(); | ||
| if (!agentProduct.isEmpty()) { | ||
| userAgent.append(" ").append(AGENT_KEY).append("/").append(agentProduct); | ||
| } | ||
|
|
||
| return userAgent.toString(); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| package com.databricks.jdbc.common.util; | ||
|
|
||
| import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
|
||
| import java.util.HashMap; | ||
| import java.util.Map; | ||
| import java.util.stream.Stream; | ||
| import org.junit.jupiter.api.Test; | ||
| import org.junit.jupiter.params.ParameterizedTest; | ||
| import org.junit.jupiter.params.provider.Arguments; | ||
| import org.junit.jupiter.params.provider.MethodSource; | ||
|
|
||
| public class AgentDetectorTest { | ||
|
|
||
| /** Creates an env lookup function that returns values from the given map. */ | ||
| private static java.util.function.Function<String, String> envWith(Map<String, String> env) { | ||
| return env::get; | ||
| } | ||
|
|
||
| @ParameterizedTest | ||
| @MethodSource("singleAgentCases") | ||
| void testDetectsSingleAgent(String envVar, String expectedProduct) { | ||
| Map<String, String> env = new HashMap<>(); | ||
| env.put(envVar, "1"); | ||
| assertEquals(expectedProduct, AgentDetector.detect(envWith(env))); | ||
| } | ||
|
|
||
| static Stream<Arguments> singleAgentCases() { | ||
| return Stream.of( | ||
| Arguments.of("ANTIGRAVITY_AGENT", AgentDetector.ANTIGRAVITY), | ||
| Arguments.of("CLAUDECODE", AgentDetector.CLAUDE_CODE), | ||
| Arguments.of("CLINE_ACTIVE", AgentDetector.CLINE), | ||
| Arguments.of("CODEX_CI", AgentDetector.CODEX), | ||
| Arguments.of("CURSOR_AGENT", AgentDetector.CURSOR), | ||
| Arguments.of("GEMINI_CLI", AgentDetector.GEMINI_CLI), | ||
| Arguments.of("OPENCODE", AgentDetector.OPEN_CODE)); | ||
| } | ||
|
|
||
| @Test | ||
| void testReturnsEmptyWhenNoAgentDetected() { | ||
| Map<String, String> env = new HashMap<>(); | ||
| assertEquals("", AgentDetector.detect(envWith(env))); | ||
| } | ||
|
|
||
| @Test | ||
| void testReturnsEmptyWhenMultipleAgentsDetected() { | ||
| Map<String, String> env = new HashMap<>(); | ||
| env.put("CLAUDECODE", "1"); | ||
| env.put("CURSOR_AGENT", "1"); | ||
| assertEquals("", AgentDetector.detect(envWith(env))); | ||
| } | ||
|
|
||
| @Test | ||
| void testIgnoresEmptyEnvVarValues() { | ||
| Map<String, String> env = new HashMap<>(); | ||
| env.put("CLAUDECODE", ""); | ||
| assertEquals("", AgentDetector.detect(envWith(env))); | ||
| } | ||
|
|
||
| @Test | ||
| void testIgnoresNullEnvVarValues() { | ||
| Map<String, String> env = new HashMap<>(); | ||
| env.put("CLAUDECODE", null); | ||
| assertEquals("", AgentDetector.detect(envWith(env))); | ||
| } | ||
|
|
||
| @Test | ||
| void testAllKnownAgentsAreCovered() { | ||
| // Verify every entry in KNOWN_AGENTS can be detected individually | ||
| for (String[] entry : AgentDetector.KNOWN_AGENTS) { | ||
| Map<String, String> env = new HashMap<>(); | ||
| env.put(entry[0], "1"); | ||
| assertEquals( | ||
| entry[1], | ||
| AgentDetector.detect(envWith(env)), | ||
| "Agent with env var " + entry[0] + " should be detected as " + entry[1]); | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we need a 1-1 mapping instead of simple array or list of strings ? is there any backend logic that process these strings which require them to be in this certain format ?