From 22ad11b5d833be0c30ba0dd648a79a98c416472b Mon Sep 17 00:00:00 2001 From: Ben Tin <35538464+bentcc@users.noreply.github.com> Date: Sat, 28 Feb 2026 21:19:03 +0800 Subject: [PATCH] feat: add image generation models with per-model validation, prompt flags, and nanobanana output - Add antigravity-gemini-3-pro-image and antigravity-gemini-3.1-flash-image model definitions - Per-model aspect ratio validation (flash supports 1:4, 4:1, 1:8, 8:1; pro does not) - Per-model imageSize validation (flash supports 0.5K; pro does not) - Parse --resolution and --aspect-ratio flags from prompt text (stripped before sending to Gemini) - Prompt flag overrides take priority over env vars (OPENCODE_IMAGE_SIZE, OPENCODE_IMAGE_ASPECT_RATIO) - Change image output directory from ~/.opencode/generated-images/ to ./nanobanana/ - Flash-image thinking support (minimal/high) - Update README with image model config and usage docs --- README.md | 34 +++ package-lock.json | 4 +- src/plugin/accounts.test.ts | 6 +- src/plugin/config/models.test.ts | 1 + src/plugin/config/models.ts | 12 ++ src/plugin/image-saver.ts | 7 +- src/plugin/request.ts | 66 +++++- src/plugin/transform/gemini.test.ts | 226 +++++++++++++++++++- src/plugin/transform/gemini.ts | 112 ++++++++-- src/plugin/transform/index.ts | 12 +- src/plugin/transform/model-resolver.test.ts | 38 +++- src/plugin/transform/model-resolver.ts | 35 ++- src/plugin/transform/prompt-flags.test.ts | 196 +++++++++++++++++ src/plugin/transform/prompt-flags.ts | 109 ++++++++++ 14 files changed, 805 insertions(+), 53 deletions(-) create mode 100644 src/plugin/transform/prompt-flags.test.ts create mode 100644 src/plugin/transform/prompt-flags.ts diff --git a/README.md b/README.md index e87552ea..b868a2d0 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ opencode run "Hello" --model=google/antigravity-claude-opus-4-6-thinking --varia | `antigravity-gemini-3-flash` | minimal, low, medium, high | Gemini 3 Flash with thinking | | `antigravity-claude-sonnet-4-6` | — | Claude Sonnet 4.6 | | `antigravity-claude-opus-4-6-thinking` | low, max | Claude Opus 4.6 with extended thinking | +| `antigravity-gemini-3.1-flash-image` | minimal, high | Gemini 3.1 Flash image generation | **Gemini CLI quota** (separate from Antigravity; used when `cli_first` is true or as fallback): @@ -148,6 +149,30 @@ opencode run "Hello" --model=google/antigravity-claude-opus-4-6-thinking --varia For details on variant configuration and thinking levels, see [docs/MODEL-VARIANTS.md](docs/MODEL-VARIANTS.md). +**Image generation:** + +Select an image model and include your prompt. Images are saved to `./nanobanana/` in your project directory. + +```bash +opencode run "a realistic animal sitting by a window, soft streetlight, bokeh" --model=google/antigravity-gemini-3.1-flash-image +``` + +Use `--resolution` and `--aspect-ratio` flags inline to override defaults: + +```bash +# 4K resolution with 16:9 aspect ratio +opencode run "mountain landscape --resolution=4K --aspect-ratio=16:9" --model=google/antigravity-gemini-3.1-flash-image +``` + +Flags are stripped from the prompt before sending to Gemini. Defaults can also be set via environment variables: + +| Flag / Env Var | Values | Default | +|----------------|--------|---------| +| `--resolution` / `OPENCODE_IMAGE_SIZE` | `0.5K`\*, `1K`, `2K`, `4K` | `1K` | +| `--aspect-ratio` / `OPENCODE_IMAGE_ASPECT_RATIO` | `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, `4:1`\*, `8:1`\* | `1:1` | + +\* `0.5K` and extended aspect ratios (`4:1`, `8:1`) are only supported by `gemini-3.1-flash-image`. +
Full models configuration (copy-paste ready) @@ -203,6 +228,15 @@ Add this to your `~/.config/opencode/opencode.json`: "max": { "thinkingConfig": { "thinkingBudget": 32768 } } } }, + "antigravity-gemini-3.1-flash-image": { + "name": "Gemini 3.1 Flash Image (Antigravity)", + "limit": { "context": 131072, "output": 32768 }, + "modalities": { "input": ["text", "image", "pdf"], "output": ["text", "image"] }, + "variants": { + "minimal": { "thinkingLevel": "minimal" }, + "high": { "thinkingLevel": "high" } + } + }, "gemini-2.5-flash": { "name": "Gemini 2.5 Flash (Gemini CLI)", "limit": { "context": 1048576, "output": 65536 }, diff --git a/package-lock.json b/package-lock.json index e4f8a6b1..9f14b09d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "opencode-antigravity-auth", - "version": "1.3.3-beta.2", + "version": "1.6.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "opencode-antigravity-auth", - "version": "1.3.3-beta.2", + "version": "1.6.0", "license": "MIT", "dependencies": { "@openauthjs/openauth": "^0.4.3", diff --git a/src/plugin/accounts.test.ts b/src/plugin/accounts.test.ts index ad0b62b7..2370f24f 100644 --- a/src/plugin/accounts.test.ts +++ b/src/plugin/accounts.test.ts @@ -1163,18 +1163,18 @@ describe("AccountManager", () => { const manager = new AccountManager(undefined, stored); const account = manager.getCurrentOrNextForFamily("gemini"); - manager.markRateLimited(account!, 30000, "gemini", "antigravity", "gemini-3-pro-image"); + manager.markRateLimited(account!, 30000, "gemini", "antigravity", "gemini-3.1-flash-image"); expect( manager.getMinWaitTimeForFamily( "gemini", - "gemini-3-pro-image", + "gemini-3.1-flash-image", "antigravity", true, ), ).toBe(30000); - expect(manager.getMinWaitTimeForFamily("gemini", "gemini-3-pro-image")).toBe(0); + expect(manager.getMinWaitTimeForFamily("gemini", "gemini-3.1-flash-image")).toBe(0); }); describe("parseRateLimitReason", () => { diff --git a/src/plugin/config/models.test.ts b/src/plugin/config/models.test.ts index fd5aecce..26f6074e 100644 --- a/src/plugin/config/models.test.ts +++ b/src/plugin/config/models.test.ts @@ -19,6 +19,7 @@ describe("OPENCODE_MODEL_DEFINITIONS", () => { "antigravity-claude-sonnet-4-6", "antigravity-gemini-3-flash", "antigravity-gemini-3-pro", + "antigravity-gemini-3.1-flash-image", "antigravity-gemini-3.1-pro", "gemini-2.5-flash", "gemini-2.5-pro", diff --git a/src/plugin/config/models.ts b/src/plugin/config/models.ts index 641d2e14..94d5b13d 100644 --- a/src/plugin/config/models.ts +++ b/src/plugin/config/models.ts @@ -67,6 +67,18 @@ export const OPENCODE_MODEL_DEFINITIONS: OpencodeModelDefinitions = { high: { thinkingLevel: "high" }, }, }, + "antigravity-gemini-3.1-flash-image": { + name: "Gemini 3.1 Flash Image (Antigravity)", + limit: { context: 131072, output: 32768 }, + modalities: { + input: ["text", "image", "pdf"], + output: ["text", "image"], + }, + variants: { + minimal: { thinkingLevel: "minimal" }, + high: { thinkingLevel: "high" }, + }, + }, "antigravity-claude-sonnet-4-6": { name: "Claude Sonnet 4.6 (Antigravity)", limit: { context: 200000, output: 64000 }, diff --git a/src/plugin/image-saver.ts b/src/plugin/image-saver.ts index 2b298b5f..05ef429f 100644 --- a/src/plugin/image-saver.ts +++ b/src/plugin/image-saver.ts @@ -2,19 +2,18 @@ * Image Saving Utility * * Handles saving generated images to disk and returning file paths. + * Images are saved to ./nanobanana/ relative to the current working directory. */ import * as fs from 'fs'; import * as path from 'path'; -import * as os from 'os'; /** * Default directory for saving generated images. - * Uses ~/.opencode/generated-images/ + * Uses ./nanobanana/ relative to the current working directory (process.cwd()). */ function getImageOutputDir(): string { - const homeDir = os.homedir(); - const outputDir = path.join(homeDir, '.opencode', 'generated-images'); + const outputDir = path.join(process.cwd(), 'nanobanana'); // Create directory if it doesn't exist if (!fs.existsSync(outputDir)) { diff --git a/src/plugin/request.ts b/src/plugin/request.ts index dc989738..13c4ccd4 100644 --- a/src/plugin/request.ts +++ b/src/plugin/request.ts @@ -58,7 +58,8 @@ import { needsThinkingRecovery, } from "./thinking-recovery"; import { sanitizeCrossModelPayloadInPlace } from "./transform/cross-model-sanitizer"; -import { isGemini3Model, isImageGenerationModel, buildImageGenerationConfig, applyGeminiTransforms } from "./transform"; +import { isGemini3Model, isImageGenerationModel, isFlashImageModel, buildImageGenerationConfig, applyGeminiTransforms } from "./transform"; +import { parsePromptFlags, extractLastUserPrompt } from "./transform/prompt-flags"; import { resolveModelWithTier, resolveModelWithVariant, @@ -956,9 +957,11 @@ export function prepareAntigravityRequest( } // Resolve thinking configuration based on user settings and model capabilities - // Image generation models don't support thinking - skip thinking config entirely + // Pro image models don't support thinking. Flash image models support minimal/high. const isImageModel = isImageGenerationModel(effectiveModel); - const userThinkingConfig = isImageModel ? undefined : extractThinkingConfig(requestPayload, rawGenerationConfig, extraBody); + const isFlashImage = isImageModel && isFlashImageModel(effectiveModel); + const skipThinkingForImage = isImageModel && !isFlashImage; + const userThinkingConfig = skipThinkingForImage ? undefined : extractThinkingConfig(requestPayload, rawGenerationConfig, extraBody); const hasAssistantHistory = Array.isArray(requestPayload.contents) && requestPayload.contents.some((c: any) => c?.role === "model" || c?.role === "assistant"); @@ -966,15 +969,46 @@ export function prepareAntigravityRequest( // Ignore any client-provided thinkingConfig for this model. const lowerEffective = effectiveModel.toLowerCase(); const isClaudeSonnetNonThinking = lowerEffective === "claude-sonnet-4-6"; - const effectiveUserThinkingConfig = (isClaudeSonnetNonThinking || isImageModel) ? undefined : userThinkingConfig; + const effectiveUserThinkingConfig = (isClaudeSonnetNonThinking || skipThinkingForImage) ? undefined : userThinkingConfig; - // For image models, add imageConfig instead of thinkingConfig + // For image models, add imageConfig (and optionally thinkingConfig for flash-image) if (isImageModel) { - const imageConfig = buildImageGenerationConfig(); + // Parse --resolution and --aspect-ratio flags from the last user prompt + let imageConfigOverrides: { aspectRatio?: string; imageSize?: string } | undefined; + if (Array.isArray(requestPayload.contents)) { + const lastPrompt = extractLastUserPrompt(requestPayload.contents as unknown[]); + if (lastPrompt) { + const parsed = parsePromptFlags(lastPrompt.text); + if (parsed.resolution || parsed.aspectRatio) { + imageConfigOverrides = { + imageSize: parsed.resolution, + aspectRatio: parsed.aspectRatio, + }; + // Replace prompt text with flags stripped out + const content = (requestPayload.contents as any[])[lastPrompt.contentIndex]; + if (content?.parts?.[lastPrompt.partIndex]) { + content.parts[lastPrompt.partIndex].text = parsed.cleanedPrompt; + } + log.debug(`[image] Parsed prompt flags: resolution=${parsed.resolution ?? "default"}, aspectRatio=${parsed.aspectRatio ?? "default"}`); + } + } + } + + const imageConfig = buildImageGenerationConfig(effectiveModel, imageConfigOverrides); const generationConfig = (rawGenerationConfig ?? {}) as Record; generationConfig.imageConfig = imageConfig; - // Remove any thinkingConfig that might have been set - delete generationConfig.thinkingConfig; + + // Flash image models support thinking (minimal/high) + if (isFlashImage && resolved.isThinkingModel && resolved.thinkingLevel) { + generationConfig.thinkingConfig = { + includeThoughts: true, + thinkingLevel: resolved.thinkingLevel, + }; + } else { + // Remove any thinkingConfig for non-thinking image models + delete generationConfig.thinkingConfig; + } + // Set reasonable defaults for image generation if (!generationConfig.candidateCount) { generationConfig.candidateCount = 1; @@ -1750,6 +1784,22 @@ export async function transformAntigravityResponse( headers.set("x-antigravity-context-error", "tool_pairing"); } + // Detect imageSize / imageConfig errors from Antigravity API + // If the endpoint doesn't support imageSize, strip it and let the user know + if ( + response.status === 400 && + (errorMessage.includes("imagesize") || + errorMessage.includes("image_size") || + (errorMessage.includes("imageconfig") && errorMessage.includes("invalid"))) + ) { + headers.set("x-antigravity-image-error", "imagesize_unsupported"); + console.warn( + `[image] imageSize rejected by API (model: ${effectiveModel || "unknown"}). ` + + `The Antigravity endpoint may not support imageSize for this model. ` + + `Unset OPENCODE_IMAGE_SIZE to use the default 1K resolution.` + ); + } + return new Response(JSON.stringify(errorBody), { status: response.status, statusText: response.statusText, diff --git a/src/plugin/transform/gemini.test.ts b/src/plugin/transform/gemini.test.ts index 4479444c..45a550f0 100644 --- a/src/plugin/transform/gemini.test.ts +++ b/src/plugin/transform/gemini.test.ts @@ -4,9 +4,12 @@ import { isGemini3Model, isGemini25Model, isImageGenerationModel, + isFlashImageModel, buildGemini3ThinkingConfig, buildGemini25ThinkingConfig, buildImageGenerationConfig, + getValidAspectRatios, + getValidImageSizes, normalizeGeminiTools, applyGeminiTransforms, toGeminiSchema, @@ -584,12 +587,12 @@ describe("transform/gemini", () => { }); describe("isImageGenerationModel", () => { - it("returns true for gemini-3-pro-image", () => { - expect(isImageGenerationModel("gemini-3-pro-image")).toBe(true); + it("returns true for gemini-3.1-flash-image", () => { + expect(isImageGenerationModel("gemini-3.1-flash-image")).toBe(true); }); - it("returns true for gemini-3-pro-image-preview", () => { - expect(isImageGenerationModel("gemini-3-pro-image-preview")).toBe(true); + it("returns true for gemini-3.1-flash-image-preview", () => { + expect(isImageGenerationModel("gemini-3.1-flash-image-preview")).toBe(true); }); it("returns true for gemini-2.5-flash-image", () => { @@ -600,8 +603,8 @@ describe("transform/gemini", () => { expect(isImageGenerationModel("imagen-3")).toBe(true); }); - it("returns true for uppercase GEMINI-3-PRO-IMAGE", () => { - expect(isImageGenerationModel("GEMINI-3-PRO-IMAGE")).toBe(true); + it("returns true for uppercase GEMINI-3.1-FLASH-IMAGE", () => { + expect(isImageGenerationModel("GEMINI-3.1-FLASH-IMAGE")).toBe(true); }); it("returns false for gemini-3-pro", () => { @@ -617,6 +620,37 @@ describe("transform/gemini", () => { }); }); + describe("isFlashImageModel", () => { + it("returns true for gemini-3.1-flash-image", () => { + expect(isFlashImageModel("gemini-3.1-flash-image")).toBe(true); + }); + + it("returns true for gemini-3-flash-image", () => { + expect(isFlashImageModel("gemini-3-flash-image")).toBe(true); + }); + + it("returns true for gemini-3.1-flash-image-preview", () => { + expect(isFlashImageModel("gemini-3.1-flash-image-preview")).toBe(true); + }); + + it("returns true for uppercase GEMINI-3.1-FLASH-IMAGE", () => { + expect(isFlashImageModel("GEMINI-3.1-FLASH-IMAGE")).toBe(true); + }); + + it("returns false for gemini-3-pro (non-image)", () => { + expect(isFlashImageModel("gemini-3-pro")).toBe(false); + }); + + it("returns false for gemini-3-flash (non-image)", () => { + expect(isFlashImageModel("gemini-3-flash")).toBe(false); + }); + + it("returns false for gemini-2.5-flash-image", () => { + // 2.5 flash image is not a gemini-3 flash image model + expect(isFlashImageModel("gemini-2.5-flash-image")).toBe(true); + }); + }); + describe("buildImageGenerationConfig", () => { const originalEnv = process.env; @@ -651,6 +685,24 @@ describe("transform/gemini", () => { } }); + it("accepts extended aspect ratios for flash image models", () => { + const flashOnlyRatios = ["4:1", "8:1"]; + for (const ratio of flashOnlyRatios) { + process.env.OPENCODE_IMAGE_ASPECT_RATIO = ratio; + const config = buildImageGenerationConfig("gemini-3.1-flash-image"); + expect(config.aspectRatio).toBe(ratio); + } + }); + + it("rejects extended aspect ratios for non-flash image models", () => { + const flashOnlyRatios = ["4:1", "8:1"]; + for (const ratio of flashOnlyRatios) { + process.env.OPENCODE_IMAGE_ASPECT_RATIO = ratio; + const config = buildImageGenerationConfig("some-image-model"); + expect(config.aspectRatio).toBe("1:1"); + } + }); + it("falls back to 1:1 for invalid aspect ratio", () => { process.env.OPENCODE_IMAGE_ASPECT_RATIO = "invalid"; const config = buildImageGenerationConfig(); @@ -662,6 +714,168 @@ describe("transform/gemini", () => { const config = buildImageGenerationConfig(); expect(config).toEqual({ aspectRatio: "1:1" }); }); + + it("includes imageSize when OPENCODE_IMAGE_SIZE is set to valid value", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + process.env.OPENCODE_IMAGE_SIZE = "2K"; + const config = buildImageGenerationConfig(); + expect(config).toEqual({ aspectRatio: "1:1", imageSize: "2K" }); + }); + + it("accepts all standard image sizes (1K, 2K, 4K)", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + for (const size of ["1K", "2K", "4K"]) { + process.env.OPENCODE_IMAGE_SIZE = size; + const config = buildImageGenerationConfig(); + expect(config.imageSize).toBe(size); + } + }); + + it("auto-corrects lowercase k to uppercase K", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + process.env.OPENCODE_IMAGE_SIZE = "2k"; + const config = buildImageGenerationConfig(); + expect(config.imageSize).toBe("2K"); + }); + + it("ignores invalid imageSize values", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + process.env.OPENCODE_IMAGE_SIZE = "8K"; + const config = buildImageGenerationConfig(); + expect(config.imageSize).toBeUndefined(); + }); + + it("allows 0.5K for flash image models", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + process.env.OPENCODE_IMAGE_SIZE = "0.5K"; + const config = buildImageGenerationConfig("gemini-3.1-flash-image"); + expect(config.imageSize).toBe("0.5K"); + }); + + it("rejects 0.5K for non-flash image models", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + process.env.OPENCODE_IMAGE_SIZE = "0.5K"; + const config = buildImageGenerationConfig("some-image-model"); + expect(config.imageSize).toBeUndefined(); + }); + + it("does not include imageSize when env var is not set", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + delete process.env.OPENCODE_IMAGE_SIZE; + const config = buildImageGenerationConfig(); + expect(config.imageSize).toBeUndefined(); + }); + + it("combines aspectRatio and imageSize", () => { + process.env.OPENCODE_IMAGE_ASPECT_RATIO = "16:9"; + process.env.OPENCODE_IMAGE_SIZE = "4K"; + const config = buildImageGenerationConfig(); + expect(config).toEqual({ aspectRatio: "16:9", imageSize: "4K" }); + }); + + describe("overrides (prompt flags)", () => { + it("override aspectRatio takes priority over env var", () => { + process.env.OPENCODE_IMAGE_ASPECT_RATIO = "1:1"; + const config = buildImageGenerationConfig(undefined, { aspectRatio: "16:9" }); + expect(config.aspectRatio).toBe("16:9"); + }); + + it("override imageSize takes priority over env var", () => { + process.env.OPENCODE_IMAGE_SIZE = "1K"; + const config = buildImageGenerationConfig(undefined, { imageSize: "4K" }); + expect(config.imageSize).toBe("4K"); + }); + + it("override with both aspectRatio and imageSize", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + delete process.env.OPENCODE_IMAGE_SIZE; + const config = buildImageGenerationConfig("gemini-3.1-flash-image", { + aspectRatio: "9:16", + imageSize: "2K", + }); + expect(config).toEqual({ aspectRatio: "9:16", imageSize: "2K" }); + }); + + it("falls back to env var when override is undefined", () => { + process.env.OPENCODE_IMAGE_ASPECT_RATIO = "3:2"; + process.env.OPENCODE_IMAGE_SIZE = "2K"; + const config = buildImageGenerationConfig(undefined, {}); + expect(config.aspectRatio).toBe("3:2"); + expect(config.imageSize).toBe("2K"); + }); + + it("validates override aspectRatio against model-specific list", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + // 4:1 is only valid for flash models, not for non-flash + const config = buildImageGenerationConfig("some-image-model", { aspectRatio: "4:1" }); + expect(config.aspectRatio).toBe("1:1"); // falls back to default + }); + + it("accepts flash-only aspect ratio override for flash model", () => { + delete process.env.OPENCODE_IMAGE_ASPECT_RATIO; + const config = buildImageGenerationConfig("gemini-3.1-flash-image", { aspectRatio: "8:1" }); + expect(config.aspectRatio).toBe("8:1"); + }); + + it("validates override imageSize against model-specific list", () => { + delete process.env.OPENCODE_IMAGE_SIZE; + // 0.5K is only valid for flash models + const config = buildImageGenerationConfig("some-image-model", { imageSize: "0.5K" }); + expect(config.imageSize).toBeUndefined(); + }); + + it("accepts 0.5K override for flash model", () => { + delete process.env.OPENCODE_IMAGE_SIZE; + const config = buildImageGenerationConfig("gemini-3.1-flash-image", { imageSize: "0.5K" }); + expect(config.imageSize).toBe("0.5K"); + }); + + it("auto-corrects lowercase k in override", () => { + delete process.env.OPENCODE_IMAGE_SIZE; + const config = buildImageGenerationConfig(undefined, { imageSize: "4k" }); + expect(config.imageSize).toBe("4K"); + }); + }); + }); + + describe("getValidAspectRatios", () => { + it("returns base ratios for non-flash image model", () => { + const ratios = getValidAspectRatios("some-image-model"); + expect(ratios).toContain("1:1"); + expect(ratios).toContain("16:9"); + expect(ratios).not.toContain("4:1"); + expect(ratios).not.toContain("8:1"); + }); + + it("returns extended ratios for flash image model", () => { + const ratios = getValidAspectRatios("gemini-3.1-flash-image"); + expect(ratios).toContain("1:1"); + expect(ratios).toContain("16:9"); + expect(ratios).toContain("4:1"); + expect(ratios).toContain("8:1"); + }); + + it("returns base ratios when no model specified", () => { + const ratios = getValidAspectRatios(); + expect(ratios).not.toContain("4:1"); + }); + }); + + describe("getValidImageSizes", () => { + it("returns base sizes for non-flash image model", () => { + const sizes = getValidImageSizes("some-image-model"); + expect(sizes).toEqual(["1K", "2K", "4K"]); + }); + + it("returns flash sizes (including 0.5K) for flash image model", () => { + const sizes = getValidImageSizes("gemini-3.1-flash-image"); + expect(sizes).toEqual(["0.5K", "1K", "2K", "4K"]); + }); + + it("returns pro sizes when no model specified", () => { + const sizes = getValidImageSizes(); + expect(sizes).not.toContain("0.5K"); + }); }); describe("toGeminiSchema", () => { diff --git a/src/plugin/transform/gemini.ts b/src/plugin/transform/gemini.ts index f98243ba..91eeaea9 100644 --- a/src/plugin/transform/gemini.ts +++ b/src/plugin/transform/gemini.ts @@ -187,38 +187,118 @@ export function buildGemini25ThinkingConfig( * Image generation config for Gemini image models. * * Supported aspect ratios: "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9" + * Supported image sizes: "1K" (default), "2K", "4K" + * - "0.5K" (512px) is only supported by gemini-3.1-flash-image + * - Values MUST use uppercase "K" (e.g., "2K" not "2k") */ export interface ImageConfig { aspectRatio?: string; + imageSize?: string; } /** - * Valid aspect ratios for image generation. + * Valid aspect ratios for image models (shared baseline). */ -const VALID_ASPECT_RATIOS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"]; +const VALID_ASPECT_RATIOS_BASE = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"] + +/** + * Valid aspect ratios for flash image models (gemini-3.1-flash-image). + * Includes base ratios plus extended ratios (4:1, 8:1). + */ +const VALID_ASPECT_RATIOS_FLASH = [...VALID_ASPECT_RATIOS_BASE, "4:1", "8:1"] + +/** + * Valid image sizes for image models (shared baseline). + * Must use uppercase K (e.g., "1K" not "1k"). + */ +const VALID_IMAGE_SIZES_BASE = ["1K", "2K", "4K"] + +/** + * Valid image sizes for flash image models. + * Includes 0.5K (512px) which is flash-only. + */ +const VALID_IMAGE_SIZES_FLASH = ["0.5K", "1K", "2K", "4K"] + +/** + * Get valid aspect ratios for a given model. + */ +export function getValidAspectRatios(model?: string): string[] { + if (model && isFlashImageModel(model)) { + return VALID_ASPECT_RATIOS_FLASH + } + return VALID_ASPECT_RATIOS_BASE +} + +/** + * Get valid image sizes for a given model. + */ +export function getValidImageSizes(model?: string): string[] { + if (model && isFlashImageModel(model)) { + return VALID_IMAGE_SIZES_FLASH + } + return VALID_IMAGE_SIZES_BASE +} + +/** + * Check if a model is a flash-based image model (supports 0.5K and thinking). + */ +export function isFlashImageModel(model: string): boolean { + return /flash.*image|image.*flash/i.test(model); +} + +/** + * Options for building image generation config. + * Prompt flag overrides take priority over environment variables. + */ +export interface ImageConfigOverrides { + /** Override aspect ratio from prompt flags (e.g., --aspect-ratio=16:9) */ + aspectRatio?: string + /** Override image size from prompt flags (e.g., --resolution=4K) */ + imageSize?: string +} /** * Build image generation config for Gemini image models. * - * Configuration is read from environment variables: - * - OPENCODE_IMAGE_ASPECT_RATIO: Aspect ratio (e.g., "16:9", "4:3") - * - * Defaults to 1:1 aspect ratio if not specified. + * Priority order (highest to lowest): + * 1. Prompt flag overrides (--resolution, --aspect-ratio) + * 2. Environment variables (OPENCODE_IMAGE_ASPECT_RATIO, OPENCODE_IMAGE_SIZE) + * 3. Defaults (1:1 aspect ratio, API default 1K image size) * - * Note: Resolution setting is not currently supported by the Antigravity API. + * @param model - The model name, used to select valid aspect ratios and image sizes + * @param overrides - Optional overrides from prompt flag parsing */ -export function buildImageGenerationConfig(): ImageConfig { - // Read aspect ratio from environment or default to 1:1 - const aspectRatio = process.env.OPENCODE_IMAGE_ASPECT_RATIO || "1:1"; - - if (VALID_ASPECT_RATIOS.includes(aspectRatio)) { - return { aspectRatio }; +export function buildImageGenerationConfig(model?: string, overrides?: ImageConfigOverrides): ImageConfig { + const config: ImageConfig = {} + const validAspectRatios = getValidAspectRatios(model) + const validImageSizes = getValidImageSizes(model) + + // Resolve aspect ratio: overrides > env var > default "1:1" + const aspectRatio = overrides?.aspectRatio || process.env.OPENCODE_IMAGE_ASPECT_RATIO || "1:1" + if (validAspectRatios.includes(aspectRatio)) { + config.aspectRatio = aspectRatio + } else { + console.warn(`[gemini] Invalid aspect ratio "${aspectRatio}" for model "${model || "unknown"}". Using default "1:1". Valid values: ${validAspectRatios.join(", ")}`) + config.aspectRatio = "1:1" } - console.warn(`[gemini] Invalid aspect ratio "${aspectRatio}". Using default "1:1". Valid values: ${VALID_ASPECT_RATIOS.join(", ")}`); + // Resolve image size: overrides > env var > not set (API default 1K) + const imageSize = overrides?.imageSize || process.env.OPENCODE_IMAGE_SIZE + if (imageSize) { + // Validate uppercase K requirement + if (/^\d+(\.\d+)?k$/i.test(imageSize) && imageSize !== imageSize.replace(/k$/i, "K")) { + console.warn(`[gemini] imageSize must use uppercase "K" (e.g., "2K" not "2k"). Got "${imageSize}". Correcting to "${imageSize.toUpperCase()}".`) + } + const normalized = imageSize.toUpperCase() + + if (!validImageSizes.includes(normalized)) { + console.warn(`[gemini] Invalid imageSize "${imageSize}" for model "${model || "unknown"}". Ignoring. Valid values: ${validImageSizes.join(", ")}`) + } else { + config.imageSize = normalized + } + } - // Default to 1:1 square aspect ratio - return { aspectRatio: "1:1" }; + return config } /** diff --git a/src/plugin/transform/index.ts b/src/plugin/transform/index.ts index 7a801767..60992f53 100644 --- a/src/plugin/transform/index.ts +++ b/src/plugin/transform/index.ts @@ -50,13 +50,23 @@ export { isGemini3Model, isGemini25Model, isImageGenerationModel, + isFlashImageModel, buildGemini3ThinkingConfig, buildGemini25ThinkingConfig, buildImageGenerationConfig, + getValidAspectRatios, + getValidImageSizes, normalizeGeminiTools, applyGeminiTransforms, } from "./gemini"; -export type { GeminiTransformOptions, GeminiTransformResult, ImageConfig } from "./gemini"; +export type { GeminiTransformOptions, GeminiTransformResult, ImageConfig, ImageConfigOverrides } from "./gemini"; + +// Prompt flag parsing +export { + parsePromptFlags, + extractLastUserPrompt, +} from "./prompt-flags"; +export type { ParsedPromptFlags } from "./prompt-flags"; // Cross-model sanitization export { diff --git a/src/plugin/transform/model-resolver.test.ts b/src/plugin/transform/model-resolver.test.ts index e0b9b487..0d8602a8 100644 --- a/src/plugin/transform/model-resolver.test.ts +++ b/src/plugin/transform/model-resolver.test.ts @@ -79,7 +79,7 @@ describe("resolveModelWithTier", () => { }); it("keeps antigravity for image models when cli_first is true", () => { - const result = resolveModelWithTier("gemini-3-pro-image", { cli_first: true }); + const result = resolveModelWithTier("gemini-3.1-flash-image", { cli_first: true }); expect(result.quotaPreference).toBe("antigravity"); expect(result.explicitQuota).toBe(true); }); @@ -155,20 +155,38 @@ describe("resolveModelWithTier", () => { }); describe("Image models", () => { - it("marks antigravity-gemini-3-pro-image as explicit quota", () => { - const result = resolveModelWithTier("antigravity-gemini-3-pro-image"); - expect(result.actualModel).toBe("gemini-3-pro-image"); + it("marks gemini-3.1-flash-image as thinking-capable image model", () => { + const result = resolveModelWithTier("gemini-3.1-flash-image"); + expect(result.actualModel).toBe("gemini-3.1-flash-image"); expect(result.isImageModel).toBe(true); + expect(result.isThinkingModel).toBe(true); + expect(result.thinkingLevel).toBe("minimal"); expect(result.explicitQuota).toBe(true); - expect(result.quotaPreference).toBe("antigravity"); }); - it("marks gemini-3-pro-image as explicit quota", () => { - const result = resolveModelWithTier("gemini-3-pro-image"); - expect(result.actualModel).toBe("gemini-3-pro-image"); + it("flash-image with -high tier gets high thinkingLevel", () => { + const result = resolveModelWithTier("gemini-3.1-flash-image-high"); + expect(result.actualModel).toBe("gemini-3.1-flash-image"); expect(result.isImageModel).toBe(true); - expect(result.explicitQuota).toBe(true); - expect(result.quotaPreference).toBe("antigravity"); + expect(result.isThinkingModel).toBe(true); + expect(result.thinkingLevel).toBe("high"); + expect(result.tier).toBe("high"); + }); + + it("flash-image with -minimal tier gets minimal thinkingLevel", () => { + const result = resolveModelWithTier("gemini-3.1-flash-image-minimal"); + expect(result.actualModel).toBe("gemini-3.1-flash-image"); + expect(result.isImageModel).toBe(true); + expect(result.isThinkingModel).toBe(true); + expect(result.thinkingLevel).toBe("minimal"); + }); + + it("antigravity-gemini-3.1-flash-image defaults to minimal thinking", () => { + const result = resolveModelWithTier("antigravity-gemini-3.1-flash-image"); + expect(result.actualModel).toBe("gemini-3.1-flash-image"); + expect(result.isImageModel).toBe(true); + expect(result.isThinkingModel).toBe(true); + expect(result.thinkingLevel).toBe("minimal"); }); }); }); diff --git a/src/plugin/transform/model-resolver.ts b/src/plugin/transform/model-resolver.ts index dd635c3a..40725240 100644 --- a/src/plugin/transform/model-resolver.ts +++ b/src/plugin/transform/model-resolver.ts @@ -54,7 +54,8 @@ export const MODEL_ALIASES: Record = { "gemini-claude-opus-4-6-thinking-high": "claude-opus-4-6-thinking", "gemini-claude-sonnet-4-6": "claude-sonnet-4-6", - // Image generation models - only gemini-3-pro-image is available via Antigravity API + // Image generation models - gemini-3.1-flash-image is available via Antigravity API + // Note: gemini-3-pro-image was removed by Google from Antigravity // Note: gemini-2.5-flash-image (Nano Banana) is NOT supported by Antigravity - only Google AI API // Reference: Antigravity-Manager/src-tauri/src/proxy/common/model_mapping.rs }; @@ -68,10 +69,18 @@ const GEMINI_3_FLASH_REGEX = /^gemini-3(?:\.\d+)?-flash/i; /** * Image generation models - always route to Antigravity. - * These models don't support thinking and require imageConfig. + * These models require imageConfig. + * Note: Flash image models (gemini-3.1-flash-image) DO support thinking (minimal/high). */ const IMAGE_GENERATION_MODELS = /image|imagen/i; +/** + * Flash-based image models that support thinking levels (minimal, high). + * These use thinkingLevel in addition to imageConfig. + * Pro image models have been removed by Google from Antigravity. + */ +const FLASH_IMAGE_MODEL = /flash.*image|image.*flash/i; + // Legacy LEGACY_ANTIGRAVITY_GEMINI3 regex removed - all Gemini models now default to antigravity /** @@ -200,8 +209,28 @@ export function resolveModelWithTier(requestedModel: string, options: ModelResol const isThinking = isThinkingCapableModel(resolvedModel); - // Image generation models don't support thinking - return early without thinking config + // Image generation models require imageConfig. + // Flash image models (gemini-3.1-flash-image) DO support thinking (minimal/high). + // Pro image models (gemini-3-pro-image) do NOT support thinking. if (isImageModel) { + const isFlashImage = FLASH_IMAGE_MODEL.test(modelWithoutQuota); + + // Flash image models support thinking with minimal (default) and high levels + // Non-flash image models are no longer supported (gemini-3-pro-image removed by Google) + if (isFlashImage) { + const flashImageThinkingLevel = tier === "high" ? "high" : "minimal"; + return { + actualModel: resolvedModel, + isThinkingModel: true, + isImageModel: true, + thinkingLevel: flashImageThinkingLevel, + ...(tier ? { tier } : {}), + quotaPreference, + explicitQuota, + }; + } + + // Fallback for any other image model patterns (e.g., imagen) return { actualModel: resolvedModel, isThinkingModel: false, diff --git a/src/plugin/transform/prompt-flags.test.ts b/src/plugin/transform/prompt-flags.test.ts new file mode 100644 index 00000000..8bf2bc58 --- /dev/null +++ b/src/plugin/transform/prompt-flags.test.ts @@ -0,0 +1,196 @@ +import { describe, it, expect } from "vitest" +import { parsePromptFlags, extractLastUserPrompt } from "./prompt-flags.ts" + +describe("transform/prompt-flags", () => { + describe("parsePromptFlags", () => { + it("returns prompt unchanged when no flags present", () => { + const result = parsePromptFlags("一個坐在窗邊的寫實動物,柔和街燈,柔焦") + expect(result.cleanedPrompt).toBe("一個坐在窗邊的寫實動物,柔和街燈,柔焦") + expect(result.resolution).toBeUndefined() + expect(result.aspectRatio).toBeUndefined() + }) + + it("extracts --resolution=4K and strips from prompt", () => { + const result = parsePromptFlags("a cat --resolution=4K sitting by a window") + expect(result.resolution).toBe("4K") + expect(result.cleanedPrompt).toBe("a cat sitting by a window") + }) + + it("extracts --aspect-ratio=16:9 and strips from prompt", () => { + const result = parsePromptFlags("mountain landscape --aspect-ratio=16:9") + expect(result.aspectRatio).toBe("16:9") + expect(result.cleanedPrompt).toBe("mountain landscape") + }) + + it("extracts both flags simultaneously", () => { + const result = parsePromptFlags("sunset beach --resolution=2K --aspect-ratio=9:16") + expect(result.resolution).toBe("2K") + expect(result.aspectRatio).toBe("9:16") + expect(result.cleanedPrompt).toBe("sunset beach") + }) + + it("handles flags at the start of prompt", () => { + const result = parsePromptFlags("--resolution=4K a beautiful forest") + expect(result.resolution).toBe("4K") + expect(result.cleanedPrompt).toBe("a beautiful forest") + }) + + it("handles flags at the end of prompt", () => { + const result = parsePromptFlags("a beautiful forest --resolution=4K") + expect(result.resolution).toBe("4K") + expect(result.cleanedPrompt).toBe("a beautiful forest") + }) + + it("handles lowercase resolution value", () => { + const result = parsePromptFlags("a cat --resolution=4k") + expect(result.resolution).toBe("4k") + // Note: normalization to uppercase is handled by buildImageGenerationConfig, not here + }) + + it("handles 0.5K resolution", () => { + const result = parsePromptFlags("quick sketch --resolution=0.5K") + expect(result.resolution).toBe("0.5K") + }) + + it("handles quoted values with double quotes", () => { + const result = parsePromptFlags('a cat --aspect-ratio="16:9"') + expect(result.aspectRatio).toBe("16:9") + }) + + it("handles quoted values with single quotes", () => { + const result = parsePromptFlags("a cat --aspect-ratio='16:9'") + expect(result.aspectRatio).toBe("16:9") + }) + + it("handles space-separated flag values", () => { + const result = parsePromptFlags("a cat --resolution 4K") + expect(result.resolution).toBe("4K") + expect(result.cleanedPrompt).toBe("a cat") + }) + + it("handles mixed Chinese and English with flags", () => { + const result = parsePromptFlags("一個寫實貓 --resolution=4K --aspect-ratio=1:1 sitting by window") + expect(result.resolution).toBe("4K") + expect(result.aspectRatio).toBe("1:1") + expect(result.cleanedPrompt).toBe("一個寫實貓 sitting by window") + }) + + it("collapses multiple spaces after flag removal", () => { + const result = parsePromptFlags("a cat --resolution=4K sitting") + expect(result.resolution).toBe("4K") + expect(result.cleanedPrompt).not.toContain(" ") + }) + + it("trims whitespace from cleaned prompt", () => { + const result = parsePromptFlags(" --resolution=4K a cat ") + expect(result.resolution).toBe("4K") + expect(result.cleanedPrompt).toBe("a cat") + }) + + it("handles extended aspect ratios for flash models", () => { + const result = parsePromptFlags("tall banner --aspect-ratio=4:1") + expect(result.aspectRatio).toBe("4:1") + }) + + it("handles empty prompt with only flags", () => { + const result = parsePromptFlags("--resolution=2K --aspect-ratio=1:1") + expect(result.resolution).toBe("2K") + expect(result.aspectRatio).toBe("1:1") + expect(result.cleanedPrompt).toBe("") + }) + + it("is case-insensitive for flag names", () => { + const result = parsePromptFlags("a cat --Resolution=4K --Aspect-Ratio=16:9") + expect(result.resolution).toBe("4K") + expect(result.aspectRatio).toBe("16:9") + }) + + it("only extracts the first occurrence of each flag", () => { + const result = parsePromptFlags("a cat --resolution=4K --resolution=2K") + expect(result.resolution).toBe("4K") + }) + }) + + describe("extractLastUserPrompt", () => { + it("returns undefined for empty array", () => { + expect(extractLastUserPrompt([])).toBeUndefined() + }) + + it("returns undefined for non-array input", () => { + expect(extractLastUserPrompt(null as any)).toBeUndefined() + }) + + it("returns undefined when no user messages exist", () => { + const contents = [ + { role: "model", parts: [{ text: "Hello" }] }, + ] + expect(extractLastUserPrompt(contents)).toBeUndefined() + }) + + it("extracts text from single user message", () => { + const contents = [ + { role: "user", parts: [{ text: "draw a cat" }] }, + ] + const result = extractLastUserPrompt(contents) + expect(result).toBeDefined() + expect(result!.text).toBe("draw a cat") + expect(result!.contentIndex).toBe(0) + expect(result!.partIndex).toBe(0) + }) + + it("returns the last user message when multiple exist", () => { + const contents = [ + { role: "user", parts: [{ text: "first message" }] }, + { role: "model", parts: [{ text: "response" }] }, + { role: "user", parts: [{ text: "draw a cat --resolution=4K" }] }, + ] + const result = extractLastUserPrompt(contents) + expect(result).toBeDefined() + expect(result!.text).toBe("draw a cat --resolution=4K") + expect(result!.contentIndex).toBe(2) + }) + + it("finds the last text part in a multi-part user message", () => { + const contents = [ + { + role: "user", + parts: [ + { inlineData: { mimeType: "image/png", data: "base64" } }, + { text: "describe this --resolution=4K" }, + ], + }, + ] + const result = extractLastUserPrompt(contents) + expect(result).toBeDefined() + expect(result!.text).toBe("describe this --resolution=4K") + expect(result!.partIndex).toBe(1) + }) + + it("skips user messages with no text parts", () => { + const contents = [ + { role: "user", parts: [{ text: "earlier prompt" }] }, + { role: "user", parts: [{ inlineData: { mimeType: "image/png", data: "base64" } }] }, + ] + const result = extractLastUserPrompt(contents) + expect(result).toBeDefined() + expect(result!.text).toBe("earlier prompt") + expect(result!.contentIndex).toBe(0) + }) + + it("skips empty text parts", () => { + const contents = [ + { role: "user", parts: [{ text: "real content" }, { text: "" }] }, + ] + const result = extractLastUserPrompt(contents) + expect(result).toBeDefined() + expect(result!.text).toBe("real content") + }) + + it("handles missing parts array", () => { + const contents = [ + { role: "user" }, + ] + expect(extractLastUserPrompt(contents)).toBeUndefined() + }) + }) +}) diff --git a/src/plugin/transform/prompt-flags.ts b/src/plugin/transform/prompt-flags.ts new file mode 100644 index 00000000..57672297 --- /dev/null +++ b/src/plugin/transform/prompt-flags.ts @@ -0,0 +1,109 @@ +/** + * Prompt Flag Parser + * + * Extracts --resolution and --aspect-ratio flags from image generation prompts. + * Flags are stripped from the prompt text before sending to Gemini so the model + * only sees the clean image description. + * + * Supported flags: + * --resolution=4K -> maps to imageSize (valid: 0.5K, 1K, 2K, 4K) + * --aspect-ratio=16:9 -> maps to aspectRatio (valid depends on model) + */ + +/** + * Result of parsing prompt flags. + */ +export interface ParsedPromptFlags { + /** The prompt text with flags stripped out */ + cleanedPrompt: string + /** Extracted --resolution value (e.g., "4K"), or undefined if not specified */ + resolution?: string + /** Extracted --aspect-ratio value (e.g., "16:9"), or undefined if not specified */ + aspectRatio?: string +} + +/** + * Regex patterns for supported flags. + * Matches both --flag=value and --flag value formats. + * Values can be optionally quoted with single or double quotes. + */ +const RESOLUTION_PATTERN = /--resolution[=\s]+["']?([^\s"']+)["']?/gi +const ASPECT_RATIO_PATTERN = /--aspect-ratio[=\s]+["']?([^\s"']+)["']?/gi + +/** + * Parse prompt flags from text and return cleaned prompt + extracted values. + * + * @param prompt - The raw prompt text that may contain flags + * @returns Parsed result with cleaned prompt and extracted flag values + */ +export function parsePromptFlags(prompt: string): ParsedPromptFlags { + const result: ParsedPromptFlags = { + cleanedPrompt: prompt, + } + + // Extract --resolution + const resolutionMatch = RESOLUTION_PATTERN.exec(prompt) + if (resolutionMatch?.[1]) { + result.resolution = resolutionMatch[1] + } + // Reset lastIndex for global regex + RESOLUTION_PATTERN.lastIndex = 0 + + // Extract --aspect-ratio + const aspectRatioMatch = ASPECT_RATIO_PATTERN.exec(prompt) + if (aspectRatioMatch?.[1]) { + result.aspectRatio = aspectRatioMatch[1] + } + ASPECT_RATIO_PATTERN.lastIndex = 0 + + // Strip all flag occurrences from prompt + let cleaned = prompt + .replace(RESOLUTION_PATTERN, "") + .replace(ASPECT_RATIO_PATTERN, "") + .replace(/\s{2,}/g, " ") + .trim() + + // Reset lastIndex after replace + RESOLUTION_PATTERN.lastIndex = 0 + ASPECT_RATIO_PATTERN.lastIndex = 0 + + result.cleanedPrompt = cleaned + + return result +} + +/** + * Extract the last user message text from a Gemini contents array. + * Used to find flags in the most recent user prompt. + * + * @param contents - The Gemini-format contents array + * @returns The text of the last user message, or undefined if not found + */ +export function extractLastUserPrompt(contents: unknown[]): { text: string, contentIndex: number, partIndex: number } | undefined { + if (!Array.isArray(contents)) { + return undefined + } + + // Walk backwards to find the last user message + for (let i = contents.length - 1; i >= 0; i--) { + const content = contents[i] as Record | undefined + if (!content || content.role !== "user") { + continue + } + + const parts = content.parts as Array> | undefined + if (!Array.isArray(parts)) { + continue + } + + // Find the last text part in this user message + for (let j = parts.length - 1; j >= 0; j--) { + const part = parts[j] + if (part && typeof part.text === "string" && part.text.trim().length > 0) { + return { text: part.text, contentIndex: i, partIndex: j } + } + } + } + + return undefined +}