diff --git a/src/agent/actions.ts b/src/agent/actions.ts index 99bfbc1..cb14b07 100644 --- a/src/agent/actions.ts +++ b/src/agent/actions.ts @@ -271,6 +271,14 @@ export function normalizeAction(input: unknown): AgentAction { }; } + if (type === "generate_image") { + return { + type, + prompt: String(input.prompt ?? ""), + reason: input.reason ? String(input.reason) : undefined, + }; + } + if (type === "wait") { return { type, diff --git a/src/agent/agent-runtime.ts b/src/agent/agent-runtime.ts index e874a25..cbf5f2e 100644 --- a/src/agent/agent-runtime.ts +++ b/src/agent/agent-runtime.ts @@ -46,6 +46,7 @@ import { buildPiAiModel } from "./model-client.js"; import { buildSystemPrompt, buildUserPrompt, type SystemPromptMode } from "./prompts.js"; import { CHAT_TOOLS, TOOL_METAS, toolNameToActionType, type ToolMeta } from "./tools.js"; import { normalizeAction } from "./actions.js"; +import { createImageService, type ImageGenerationService } from "../services/image-generation/index.js"; import { runRuntimeAttempt } from "./runtime/attempt.js"; import { runRuntimeTask } from "./runtime/run.js"; import type { RunTaskRequest } from "./runtime/types.js"; @@ -307,6 +308,7 @@ export class AgentRuntime { private readonly piCodingToolsExecutor: PiCodingToolsExecutor; private readonly memoryExecutor: MemoryExecutor; private readonly screenshotStore: ScreenshotStore; + private readonly imageGenerationService: ImageGenerationService | null; private busy = false; private stopRequested = false; private currentTask: string | null = null; @@ -331,9 +333,43 @@ export class AgentRuntime { config.screenshots.directory, config.screenshots.maxCount, ); + // Initialize image generation service if enabled and configured + this.imageGenerationService = this.initializeImageGenerationService(config); this.agentFactory = options?.agentFactory ?? ((agentOptions: AgentOptions) => new Agent(agentOptions)); } + private initializeImageGenerationService(config: OpenPocketConfig): ImageGenerationService | null { + if (!config.imageGeneration.enabled) { + return null; + } + + const apiKey = config.imageGeneration.apiKey || process.env[config.imageGeneration.apiKeyEnv]; + if (!apiKey) { + // eslint-disable-next-line no-console + console.warn("[OpenPocket] Image generation enabled but no API key configured"); + return null; + } + + try { + // Only support fal provider for now + if (config.imageGeneration.provider !== "fal") { + // eslint-disable-next-line no-console + console.warn(`[OpenPocket] Image generation provider '${config.imageGeneration.provider}' not yet supported`); + return null; + } + + return createImageService({ + type: config.imageGeneration.provider, + apiKey, + model: config.imageGeneration.model, + }); + } catch (error) { + // eslint-disable-next-line no-console + console.error("[OpenPocket] Failed to initialize image generation service:", error); + return null; + } + } + isBusy(): boolean { return this.busy; } @@ -2256,6 +2292,53 @@ export class AgentRuntime { return { content: [{ type: "text" as const, text: resultText }], details: {} }; } + // ---- generate_image ---- + if (action.type === "generate_image") { + if (!runtime.imageGenerationService) { + const msg = "Image generation requested, but service is not enabled or configured."; + ctx.failMessage = msg; + runtime.workspace.appendStep( + ctx.session, + step, + thought, + JSON.stringify(action, null, 2), + msg, + buildStepTrace(snapshot?.currentApp ?? "unknown", "error"), + ); + ctx.traces.push({ step, action, result: msg, thought, currentApp: snapshot?.currentApp ?? "unknown" }); + return { content: [{ type: "text" as const, text: msg }], details: {} }; + } + + try { + const result = await runtime.imageGenerationService.generate(action.prompt); + const resultText = `Image generated successfully.\nURL: ${result.url}\nProvider: ${result.provider}`; + runtime.workspace.appendStep( + ctx.session, + step, + thought, + JSON.stringify(action, null, 2), + resultText, + buildStepTrace(snapshot?.currentApp ?? "unknown", "ok"), + ); + ctx.traces.push({ step, action, result: resultText, thought, currentApp: snapshot?.currentApp ?? "unknown" }); + ctx.history.push(`step ${step}: action=generate_image url=${result.url}`); + return { content: [{ type: "text" as const, text: resultText }], details: { imageUrl: result.url } }; + } catch (error) { + const errorMsg = `Image generation failed: ${error instanceof Error ? error.message : String(error)}`; + ctx.failMessage = errorMsg; + runtime.workspace.appendStep( + ctx.session, + step, + thought, + JSON.stringify(action, null, 2), + errorMsg, + buildStepTrace(snapshot?.currentApp ?? "unknown", "error"), + ); + ctx.traces.push({ step, action, result: errorMsg, thought, currentApp: snapshot?.currentApp ?? "unknown" }); + return { content: [{ type: "text" as const, text: errorMsg }], details: {} }; + } + } + // ---- all other actions (tap, swipe, type, keyevent, launch_app, shell, run_script, read, write, edit, etc.) ---- const executionResult = await runtime.executePhoneAction(action, ctx); const stepResult = ctx.lastScreenshotPath diff --git a/src/agent/model-client.ts b/src/agent/model-client.ts index e262705..5340c1e 100644 --- a/src/agent/model-client.ts +++ b/src/agent/model-client.ts @@ -98,6 +98,22 @@ function extractThinking(msg: AssistantMessage): string { * OpenAI-compatible endpoint). */ export function buildPiAiModel(profile: ModelProfile): Model { + // If profile explicitly specifies api and provider, use them directly + if (profile.api && profile.provider) { + return { + id: profile.model, + name: profile.model, + api: profile.api as Api, + provider: profile.provider, + baseUrl: profile.baseUrl, + reasoning: profile.reasoningEffort !== null, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128_000, + maxTokens: profile.maxTokens, + }; + } + // Detect provider / api from baseUrl. const baseUrlLower = profile.baseUrl.toLowerCase(); diff --git a/src/agent/prompts.ts b/src/agent/prompts.ts index bb9eb54..a644496 100644 --- a/src/agent/prompts.ts +++ b/src/agent/prompts.ts @@ -20,6 +20,7 @@ const TOOL_CATALOG_ORDER = [ "process", "memory_search", "memory_get", + "generate_image", "request_human_auth", "request_user_decision", "request_user_input", @@ -44,6 +45,7 @@ const TOOL_CATALOG_LINES: Record<(typeof TOOL_CATALOG_ORDER)[number], string> = process: "- process: process(action[, sessionId, input, offset, limit, timeoutMs, reason])", memory_search: "- memory_search: memory_search(query[, maxResults, minScore, reason])", memory_get: "- memory_get: memory_get(path[, from, lines, reason])", + generate_image: "- generate_image: generate_image(prompt[, reason]) - Generate an image from text description using AI image generation service", request_human_auth: "- request_human_auth: request_human_auth(capability, instruction[, timeoutSec, reason])", request_user_decision: "- request_user_decision: request_user_decision(question, options[, timeoutSec, reason])", request_user_input: "- request_user_input: request_user_input(question[, placeholder, timeoutSec, reason])", diff --git a/src/agent/tools.ts b/src/agent/tools.ts index ad0be00..4a6ac93 100644 --- a/src/agent/tools.ts +++ b/src/agent/tools.ts @@ -185,6 +185,12 @@ export const finishSchema = Type.Object({ message: Type.String({ description: "Summary of what was accomplished." }), }); +export const generateImageSchema = Type.Object({ + thought: ThoughtParam, + prompt: Type.String({ description: "Text description of the image to generate." }), + reason: ReasonParam, +}); + // --------------------------------------------------------------------------- // Exported types (Static inference from TypeBox schemas) // --------------------------------------------------------------------------- @@ -210,6 +216,7 @@ export type RequestUserDecisionParams = Static export type RequestUserInputParams = Static; export type WaitParams = Static; export type FinishParams = Static; +export type GenerateImageParams = Static; // --------------------------------------------------------------------------- // Tool metadata list (name, description, schema) — used to build AgentTool[] @@ -243,6 +250,7 @@ export const TOOL_METAS: ToolMeta[] = [ { name: "request_user_input", description: "Ask user for a short non-sensitive text input needed to continue the task.", parameters: requestUserInputSchema }, { name: "wait", description: "Wait / do nothing for a short period, e.g. while content is loading.", parameters: waitSchema }, { name: "finish", description: "Signal that the user task is complete.", parameters: finishSchema }, + { name: "generate_image", description: "Generate an image from text description using an image generation service.", parameters: generateImageSchema }, ]; // --------------------------------------------------------------------------- diff --git a/src/config/index.ts b/src/config/index.ts index cf3d877..34b90ba 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -166,6 +166,13 @@ function defaultConfigObject() { }, }, }, + imageGeneration: { + enabled: false, + provider: "fal" as const, + apiKey: "", + apiKeyEnv: "FAL_API_KEY", + model: "fal-ai/nano-banana", + }, models: { "gpt-5.2-codex": { baseUrl: "https://api.openai.com/v1", @@ -613,9 +620,9 @@ function normalizeConfig(raw: Record, configPath: string): Open model.reasoningEffort ?? model.reasoning_effort ?? null; const reasoningEffort = reasoningRaw === "low" || - reasoningRaw === "medium" || - reasoningRaw === "high" || - reasoningRaw === "xhigh" + reasoningRaw === "medium" || + reasoningRaw === "high" || + reasoningRaw === "xhigh" ? reasoningRaw : null; const tempRaw = model.temperature; @@ -649,6 +656,7 @@ function normalizeConfig(raw: Record, configPath: string): Open const cron = (merged.cron ?? {}) as Record; const dashboard = (merged.dashboard ?? {}) as Record; const humanAuth = (merged.humanAuth ?? {}) as Record; + const imageGeneration = (merged.imageGeneration ?? {}) as Record; const sessionStorage = (merged.sessionStorage ?? {}) as Record; const humanAuthTunnel = isObject(humanAuth.tunnel) ? humanAuth.tunnel : {}; const humanAuthNgrok = isObject(humanAuthTunnel.ngrok) ? humanAuthTunnel.ngrok : {}; @@ -792,7 +800,7 @@ function normalizeConfig(raw: Record, configPath: string): Open localRelayStateFile: resolvePath( String( humanAuth.localRelayStateFile ?? - path.join(resolvedStateDir, "human-auth-relay", "requests.json"), + path.join(resolvedStateDir, "human-auth-relay", "requests.json"), ), ), relayBaseUrl: String(humanAuth.relayBaseUrl ?? "").trim().replace(/\/+$/, ""), @@ -813,23 +821,23 @@ function normalizeConfig(raw: Record, configPath: string): Open executable: String( humanAuthNgrok.executable ?? - defaultConfigObject().humanAuth.tunnel.ngrok.executable, + defaultConfigObject().humanAuth.tunnel.ngrok.executable, ).trim() || "ngrok", authtoken: String(humanAuthNgrok.authtoken ?? ""), authtokenEnv: String( humanAuthNgrok.authtokenEnv ?? - defaultConfigObject().humanAuth.tunnel.ngrok.authtokenEnv, + defaultConfigObject().humanAuth.tunnel.ngrok.authtokenEnv, ).trim() || "NGROK_AUTHTOKEN", apiBaseUrl: String( humanAuthNgrok.apiBaseUrl ?? - defaultConfigObject().humanAuth.tunnel.ngrok.apiBaseUrl, + defaultConfigObject().humanAuth.tunnel.ngrok.apiBaseUrl, ).trim().replace(/\/+$/, "") || "http://127.0.0.1:4040", startupTimeoutSec: (() => { const raw = Number( humanAuthNgrok.startupTimeoutSec ?? - defaultConfigObject().humanAuth.tunnel.ngrok.startupTimeoutSec, + defaultConfigObject().humanAuth.tunnel.ngrok.startupTimeoutSec, ); const value = Number.isFinite(raw) ? raw : 20; return Math.max(3, Math.round(value)); @@ -837,6 +845,18 @@ function normalizeConfig(raw: Record, configPath: string): Open }, }, }, + imageGeneration: { + enabled: Boolean(imageGeneration.enabled ?? false), + provider: (() => { + const provider = String(imageGeneration.provider ?? "fal"); + return provider === "fal" || provider === "replicate" || provider === "huggingface" + ? provider + : "fal"; + })(), + apiKey: String(imageGeneration.apiKey ?? ""), + apiKeyEnv: String(imageGeneration.apiKeyEnv ?? "FAL_API_KEY"), + model: imageGeneration.model ? String(imageGeneration.model) : undefined, + }, models, configPath, }; @@ -882,6 +902,7 @@ export function saveConfig(config: OpenPocketConfig): void { cron: config.cron, dashboard: config.dashboard, humanAuth: config.humanAuth, + imageGeneration: config.imageGeneration, models: config.models, }; fs.writeFileSync(config.configPath, `${JSON.stringify(payload, null, 2)}\n`, "utf-8"); diff --git a/src/device/adb-runtime.ts b/src/device/adb-runtime.ts index f44e81a..013286c 100644 --- a/src/device/adb-runtime.ts +++ b/src/device/adb-runtime.ts @@ -879,6 +879,9 @@ export class AdbRuntime { case "finish": { return `Finish: ${action.message}`; } + case "generate_image": { + return "generate_image is handled by AgentRuntime image generation service."; + } default: { const exhaust: never = action; return `Unknown action: ${JSON.stringify(exhaust)}`; diff --git a/src/gateway/telegram-gateway.ts b/src/gateway/telegram-gateway.ts index e7fd552..2ab8ac1 100644 --- a/src/gateway/telegram-gateway.ts +++ b/src/gateway/telegram-gateway.ts @@ -2467,14 +2467,41 @@ export class TelegramGateway { this.stripStepCounterTelemetry(finalMessage), 1800, ); - await this.bot.sendMessage( - chatId, - finalForChat, - { - disable_web_page_preview: true, - }, - ); - this.chat.appendExternalTurn(chatId, "assistant", finalMessage); + // Check if message contains an image URL + const imageUrlMatch = finalMessage.match(/https:\/\/[^\s]+\.(png|jpg|jpeg|gif|webp)/i); + if (imageUrlMatch) { + const imageUrl = imageUrlMatch[0]; + const caption = this.sanitizeForChat( + this.stripStepCounterTelemetry(finalMessage.replace(imageUrl, "").trim()), + 1000, + ); + try { + await this.bot.sendPhoto(chatId, imageUrl, { + caption: caption || undefined, + }); + this.chat.appendExternalTurn(chatId, "assistant", finalMessage); + } catch (error) { + // Fallback to text message if photo send fails + this.log(`Failed to send photo, falling back to text: ${(error as Error).message}`); + await this.bot.sendMessage( + chatId, + this.sanitizeForChat( + this.stripStepCounterTelemetry(finalMessage), + 1800, + ), + ); + this.chat.appendExternalTurn(chatId, "assistant", finalMessage); + } + } else { + await this.bot.sendMessage( + chatId, + finalForChat, + { + disable_web_page_preview: true, + }, + ); + this.chat.appendExternalTurn(chatId, "assistant", finalMessage); + } } return { diff --git a/src/services/image-generation/base.ts b/src/services/image-generation/base.ts new file mode 100644 index 0000000..daa1985 --- /dev/null +++ b/src/services/image-generation/base.ts @@ -0,0 +1,38 @@ +/** + * Base interface for image generation providers + * + * Implementations wrap specific third-party APIs (fal, replicate, etc.) + * while exposing a consistent business-level interface. + */ + +import type { + ImageGenerationRequest, + ImageGenerationResult, +} from "./types.js"; + +/** + * Abstract base class for image generation providers + * + * Each provider (fal, replicate, etc.) implements this interface. + * The factory returns appropriate instances based on configuration. + */ +export abstract class ImageGenerationProvider { + /** + * Unique identifier for this provider + */ + abstract readonly providerId: string; + + /** + * Generate an image from a text prompt + * + * @param request - Image generation request + * @returns Promise resolving to generation result + * @throws ImageGenerationError if generation fails + */ + abstract generate(request: ImageGenerationRequest): Promise; + + /** + * Check if this provider is properly configured + */ + abstract isConfigured(): boolean; +} diff --git a/src/services/image-generation/fal-provider.ts b/src/services/image-generation/fal-provider.ts new file mode 100644 index 0000000..a3096ca --- /dev/null +++ b/src/services/image-generation/fal-provider.ts @@ -0,0 +1,183 @@ +/** + * fal.ai provider for image generation + * + * Implements image generation using fal.ai's REST API. + * Supports various models including nanobanana. + */ + +import type { + ImageGenerationRequest, + ImageGenerationResult, +} from "./types.js"; +import { ImageGenerationProvider } from "./base.js"; +import { ImageGenerationError } from "./types.js"; + +/** + * Configuration for fal provider + */ +export interface FalProviderConfig { + /** fal.ai API key */ + apiKey: string; + /** Model to use (default: nanobanana) */ + model?: string; + /** API base URL (default: fal.ai queue) */ + baseUrl?: string; +} + +/** + * fal.ai API response structure + */ +interface FalQueueResponse { + request_id: string; + status: "IN_QUEUE" | "IN_PROGRESS" | "COMPLETED"; +} + +interface FalResultResponse { + images: Array<{ + url: string; + width: number; + height: number; + }>; +} + +/** + * Image generation provider using fal.ai + */ +export class FalProvider extends ImageGenerationProvider { + readonly providerId = "fal"; + + private readonly apiKey: string; + private readonly model: string; + private readonly baseUrl: string; + + constructor(config: FalProviderConfig) { + super(); + this.apiKey = config.apiKey; + this.model = config.model ?? "fal-ai/nano-banana"; + this.baseUrl = config.baseUrl ?? "https://queue.fal.run"; + } + + isConfigured(): boolean { + return this.apiKey.length > 0; + } + + async generate(request: ImageGenerationRequest): Promise { + if (!this.isConfigured()) { + throw new ImageGenerationError( + "fal provider is not configured (missing API key)", + this.providerId, + ); + } + + try { + const modelEndpoint = this.model.replace(/^fal-ai\//, ""); + const queueUrl = `${this.baseUrl}/${this.model}`; + + // Step 1: Submit request to queue + // eslint-disable-next-line no-console + console.log(`[FalProvider] Submitting request to ${queueUrl}`); + + const queueResponse = await fetch(queueUrl, { + method: "POST", + headers: { + "Authorization": `Key ${this.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + prompt: request.prompt, + }), + }); + + if (!queueResponse.ok) { + const errorText = await queueResponse.text(); + throw new Error(`HTTP ${queueResponse.status}: ${errorText}`); + } + + const queueData: FalQueueResponse = await queueResponse.json(); + + if (queueData.status === "COMPLETED") { + // Some models complete synchronously + const resultData = await queueResponse.json() as FalResultResponse; + if (!resultData.images || resultData.images.length === 0) { + throw new Error("No images in response"); + } + return { + url: resultData.images[0].url, + provider: this.providerId, + metadata: { model: this.model }, + }; + } + + // Step 2: Poll for result (async queue) + const requestId = queueData.request_id; + const statusUrl = `${this.baseUrl}/${this.model}/requests/${requestId}/status`; + + let attempts = 0; + const maxAttempts = 60; // 60 seconds timeout + const pollInterval = 1000; // 1 second + + while (attempts < maxAttempts) { + await new Promise(resolve => setTimeout(resolve, pollInterval)); + + const statusResponse = await fetch(statusUrl, { + headers: { + "Authorization": `Key ${this.apiKey}`, + }, + }); + + if (!statusResponse.ok) { + throw new Error(`Status check failed: HTTP ${statusResponse.status}`); + } + + const statusData: FalQueueResponse = await statusResponse.json(); + + if (statusData.status === "COMPLETED") { + // Fetch final result + const resultUrl = `${this.baseUrl}/${this.model}/requests/${requestId}`; + const resultResponse = await fetch(resultUrl, { + headers: { + "Authorization": `Key ${this.apiKey}`, + }, + }); + + if (!resultResponse.ok) { + throw new Error(`Failed to fetch result: HTTP ${resultResponse.status}`); + } + + const resultData: FalResultResponse = await resultResponse.json(); + + if (!resultData.images || resultData.images.length === 0) { + throw new Error("No images in response"); + } + + return { + url: resultData.images[0].url, + provider: this.providerId, + metadata: { + model: this.model, + requestId, + }, + }; + } + + if (statusData.status === "IN_QUEUE" || statusData.status === "IN_PROGRESS") { + attempts++; + continue; + } + + throw new Error(`Unexpected status: ${statusData.status}`); + } + + throw new Error("Request timed out"); + } catch (error) { + if (error instanceof ImageGenerationError) { + throw error; + } + throw new ImageGenerationError( + `Failed to generate image: ${error instanceof Error ? error.message : String(error)}`, + this.providerId, + error, + ); + } + } +} diff --git a/src/services/image-generation/index.ts b/src/services/image-generation/index.ts new file mode 100644 index 0000000..549dd55 --- /dev/null +++ b/src/services/image-generation/index.ts @@ -0,0 +1,100 @@ +/** + * Image generation service factory and exports + * + * Provides business-level image generation functionality, + * abstracting away the underlying provider implementation. + */ + +import type { ImageGenerationRequest, ImageGenerationResult } from "./types.js"; +import { ImageGenerationProvider } from "./base.js"; +import { FalProvider, type FalProviderConfig } from "./fal-provider.js"; + +// Re-export types for convenience +export type { + ImageGenerationRequest, + ImageGenerationResult, +} from "./types.js"; +export { ImageGenerationError } from "./types.js"; +export { ImageGenerationProvider } from "./base.js"; +export { FalProvider, type FalProviderConfig } from "./fal-provider.js"; + +/** + * Supported provider types + */ +export type ImageProviderType = "fal"; + +/** + * Configuration for creating an image generation provider + */ +export interface ImageProviderConfig { + type: ImageProviderType; + apiKey: string; + model?: string; +} + +/** + * Create an image generation provider based on type + * + * @param config - Provider configuration + * @returns Configured provider instance + * @throws Error if provider type is unsupported + */ +export function createImageProvider(config: ImageProviderConfig): ImageGenerationProvider { + switch (config.type) { + case "fal": + return new FalProvider({ + apiKey: config.apiKey, + model: config.model, + }); + } + // If we add more providers in the future, TypeScript will catch missing cases + throw new Error(`Unsupported image provider type: ${config.type}`); +} + +/** + * Image generation service (convenience facade) + * + * Wraps a provider and provides a simple generate interface. + */ +export class ImageGenerationService { + private readonly provider: ImageGenerationProvider; + + constructor(provider: ImageGenerationProvider) { + this.provider = provider; + } + + /** + * Generate an image from a text prompt + * + * @param prompt - Text description of the image + * @returns Promise resolving to generation result + */ + async generate(prompt: string): Promise { + return this.provider.generate({ prompt }); + } + + /** + * Check if the service is properly configured + */ + isConfigured(): boolean { + return this.provider.isConfigured(); + } + + /** + * Get the provider identifier + */ + getProviderId(): string { + return this.provider.providerId; + } +} + +/** + * Create an image generation service from configuration + * + * @param config - Provider configuration + * @returns Configured service instance + */ +export function createImageService(config: ImageProviderConfig): ImageGenerationService { + const provider = createImageProvider(config); + return new ImageGenerationService(provider); +} diff --git a/src/services/image-generation/types.ts b/src/services/image-generation/types.ts new file mode 100644 index 0000000..4b4cf1b --- /dev/null +++ b/src/services/image-generation/types.ts @@ -0,0 +1,40 @@ +/** + * Image generation service types + * + * Business-level abstraction - consumers should not need to know + * which underlying provider (fal, replicate, etc.) is being used. + */ + +/** + * Request for image generation + */ +export interface ImageGenerationRequest { + /** Text description of the image to generate */ + prompt: string; +} + +/** + * Result from image generation + */ +export interface ImageGenerationResult { + /** Public URL of the generated image */ + url: string; + /** Provider identifier (for logging/debugging) */ + provider: string; + /** Additional provider-specific metadata */ + metadata?: Record; +} + +/** + * Error thrown when image generation fails + */ +export class ImageGenerationError extends Error { + constructor( + message: string, + public readonly provider: string, + public readonly cause?: unknown, + ) { + super(message); + this.name = "ImageGenerationError"; + } +} diff --git a/src/types.ts b/src/types.ts index 0509980..bcc803f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -225,6 +225,10 @@ export interface ModelProfile { maxTokens: number; reasoningEffort: "low" | "medium" | "high" | "xhigh" | null; temperature: number | null; + /** Optional: Override the API type (e.g., "openai-responses", "openai-completions") */ + api?: string; + /** Optional: Override the provider (e.g., "openai", "openai-codex") */ + provider?: string; } export interface OpenPocketConfig { @@ -245,6 +249,7 @@ export interface OpenPocketConfig { cron: CronConfig; dashboard: DashboardConfig; humanAuth: HumanAuthConfig; + imageGeneration: ImageGenerationConfig; models: Record; configPath: string; } @@ -382,7 +387,8 @@ export type AgentAction = reason?: string; } | { type: "wait"; durationMs?: number; reason?: string } - | { type: "finish"; message: string }; + | { type: "finish"; message: string } + | { type: "generate_image"; prompt: string; reason?: string }; export interface ModelStepOutput { thought: string; @@ -416,6 +422,14 @@ export interface SkillInfo { path: string; } +export interface ImageGenerationConfig { + enabled: boolean; + provider: "fal" | "replicate" | "huggingface"; + apiKey: string; + apiKeyEnv: string; + model?: string; +} + export interface CronJob { id: string; name: string;