diff --git a/.env.example b/.env.example index 00d8892..e80429b 100644 --- a/.env.example +++ b/.env.example @@ -117,6 +117,10 @@ ANTHROPIC_API_KEY=sk-ant-your_anthropic_api_key_here # Get from: https://aistudio.google.com/app/apikey GEMINI_API_KEY=your_gemini_api_key_here +# Venice AI - Uncensored models with private inference (VVV token) +# Get from: https://venice.ai/settings/api +VENICE_API_KEY=your_venice_api_key_here + # ============================================================================ # CONTENT & MEDIA # ============================================================================ diff --git a/tools/multi-model/README.md b/tools/multi-model/README.md index 4acbf9c..108a1d0 100644 --- a/tools/multi-model/README.md +++ b/tools/multi-model/README.md @@ -1,7 +1,7 @@ # Multi-Model AI Toolkit -> Leverage OpenAI, Anthropic, and Google Gemini together for superior code -> quality. +> Leverage OpenAI, Anthropic, Google Gemini, and Venice AI together for superior +> code quality. **Part of [Claude Code Sidekick](../../README.md)** @@ -16,7 +16,9 @@ keywords - **💰 Cost Optimization** - Use cheap models for simple tasks, reserve power for complex ones -- **🔧 Flexible Configuration** - Works with just one API key or all three +- **🔧 Flexible Configuration** - Works with just one API key or all four +- **🔓 Venice AI Integration** - Access uncensored models with private inference + via [VVV token](https://venice.ai/token) ## File Structure @@ -50,6 +52,7 @@ pnpm install OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-ant-... GEMINI_API_KEY=... +VENICE_API_KEY=... # Optional: Venice AI for uncensored models ``` ## Configuration @@ -58,11 +61,20 @@ GEMINI_API_KEY=... The toolkit automatically adapts based on your available API keys: -| API Keys Available | Mode | Behavior | -| ------------------ | ------------ | ---------------------------------- | -| All three | Multi-model | Consensus review across all models | -| Two keys | Multi-model | Consensus with 2 models | -| One key | Single-model | Direct review with one model | +| API Keys Available | Mode | Behavior | +| ------------------ | ------------ | ------------------------------ | +| 3+ keys | Multi-model | Consensus review across models | +| 2 keys | Multi-model | Consensus with 2 models | +| 1 key | Single-model | Direct review with one model | + +### Supported Providers + +| Provider | Env Variable | Models | +| --------- | ------------------- | ------------------------------------ | +| OpenAI | `OPENAI_API_KEY` | GPT-4o, GPT-4o-mini | +| Anthropic | `ANTHROPIC_API_KEY` | Claude Sonnet, Claude Haiku | +| Google | `GEMINI_API_KEY` | Gemini Pro, Gemini Flash | +| Venice AI | `VENICE_API_KEY` | Llama 3.3 70B, DeepSeek R1, Qwen 2.5 | **To explicitly disable multi-model mode** (even if you have multiple keys): @@ -90,11 +102,41 @@ ANTHROPIC_API_KEY=sk-ant-... # Option 3: Gemini only GEMINI_API_KEY=... + +# Option 4: Venice AI only (uncensored models) +VENICE_API_KEY=... ``` The toolkit will automatically use the best available model for your configuration. +### Venice AI (VVV Token) + +[Venice AI](https://venice.ai) provides uncensored AI models with private +inference. Powered by the [VVV token](https://venice.ai/token). + +Available models: + +- `llama-3.3-70b` - Llama 3.3 70B (fast, capable) +- `llama-3.2-3b` - Llama 3.2 3B (lightweight, cheap) +- `deepseek-r1-671b` - DeepSeek R1 671B (reasoning, most capable) +- `deepseek-r1-llama-70b` - DeepSeek R1 Llama 70B +- `dolphin-2.9.2-qwen2-72b` - Dolphin (uncensored) +- `qwen-2.5-72b` - Qwen 2.5 72B + +```javascript +import { MODELS, complete } from '@claude-sidekick/multi-model'; + +// Use Venice models directly +const result = await complete( + MODELS.VENICE_LLAMA_70B, + 'Explain quantum computing' +); + +// Or use venice/ prefix +const result2 = await complete('venice/llama-3.3-70b', prompt); +``` + ## CLI Tools ### Multi-Model Code Review diff --git a/tools/multi-model/index.js b/tools/multi-model/index.js index 9aa07ea..dcf83e4 100644 --- a/tools/multi-model/index.js +++ b/tools/multi-model/index.js @@ -30,8 +30,10 @@ export { getOpenAI, getAnthropic, getGemini, + getVenice, MODELS, MODEL_COSTS, + VENICE_MODELS, // Configuration helpers isMultiModelEnabled, getAvailableProviders, @@ -40,6 +42,7 @@ export { getBestModel, hasProvider, filterAvailableModels, + getProviderFromModel, } from './lib/clients.js'; // Code Review diff --git a/tools/multi-model/lib/clients.js b/tools/multi-model/lib/clients.js index a8ce2d0..d281303 100644 --- a/tools/multi-model/lib/clients.js +++ b/tools/multi-model/lib/clients.js @@ -16,6 +16,7 @@ config(); let openaiClient = null; let anthropicClient = null; let geminiClient = null; +let veniceClient = null; /** * Check if multi-model mode is enabled @@ -34,6 +35,7 @@ export function getAvailableProviders() { if (process.env.OPENAI_API_KEY) providers.push('openai'); if (process.env.ANTHROPIC_API_KEY) providers.push('anthropic'); if (process.env.GEMINI_API_KEY || process.env.GEMENI_API_KEY) providers.push('gemini'); + if (process.env.VENICE_API_KEY) providers.push('venice'); return providers; } @@ -88,6 +90,24 @@ export function getGemini() { return geminiClient; } +/** + * Get Venice AI client (OpenAI-compatible) + * Venice provides uncensored models with private inference + * https://venice.ai + */ +export function getVenice() { + if (!veniceClient) { + if (!process.env.VENICE_API_KEY) { + throw new Error('VENICE_API_KEY not set in environment'); + } + veniceClient = new OpenAI({ + apiKey: process.env.VENICE_API_KEY, + baseURL: 'https://api.venice.ai/api/v1', + }); + } + return veniceClient; +} + /** * Unified completion interface */ @@ -147,6 +167,29 @@ export async function complete(model, prompt, options = {}) { }; } + // Venice AI models (OpenAI-compatible API) + if (model.startsWith('venice/') || VENICE_MODELS.includes(model)) { + const client = getVenice(); + const messages = []; + if (systemPrompt) messages.push({ role: 'system', content: systemPrompt }); + messages.push({ role: 'user', content: prompt }); + + // Strip venice/ prefix if present + const veniceModel = model.replace('venice/', ''); + + const response = await client.chat.completions.create({ + model: veniceModel, + messages, + max_tokens: maxTokens, + temperature, + }); + return { + content: response.choices[0].message.content, + model, + usage: response.usage, + }; + } + throw new Error(`Unknown model: ${model}`); } @@ -174,6 +217,16 @@ export async function embedBatch(texts, model = 'text-embedding-3-small') { return response.data.map((d) => d.embedding); } +// Venice AI model identifiers +export const VENICE_MODELS = [ + 'llama-3.3-70b', + 'llama-3.2-3b', + 'deepseek-r1-671b', + 'deepseek-r1-llama-70b', + 'dolphin-2.9.2-qwen2-72b', + 'qwen-2.5-72b', +]; + export const MODELS = { // OpenAI GPT4: 'gpt-4-turbo-preview', @@ -188,6 +241,14 @@ export const MODELS = { // Google GEMINI_PRO: 'gemini-1.5-pro', GEMINI_FLASH: 'gemini-2.0-flash', + + // Venice AI (uncensored, private inference) + VENICE_LLAMA_70B: 'llama-3.3-70b', + VENICE_LLAMA_3B: 'llama-3.2-3b', + VENICE_DEEPSEEK_R1: 'deepseek-r1-671b', + VENICE_DEEPSEEK_LLAMA: 'deepseek-r1-llama-70b', + VENICE_DOLPHIN: 'dolphin-2.9.2-qwen2-72b', + VENICE_QWEN: 'qwen-2.5-72b', }; export const MODEL_COSTS = { @@ -199,6 +260,13 @@ export const MODEL_COSTS = { 'claude-3-5-haiku-20241022': [0.8, 4], 'gemini-1.5-pro': [3.5, 10.5], 'gemini-2.0-flash': [0.075, 0.3], + // Venice AI (VVV token) - prices approximate + 'llama-3.3-70b': [0.5, 0.75], + 'llama-3.2-3b': [0.1, 0.15], + 'deepseek-r1-671b': [2.0, 4.0], + 'deepseek-r1-llama-70b': [0.5, 0.75], + 'dolphin-2.9.2-qwen2-72b': [0.5, 0.75], + 'qwen-2.5-72b': [0.5, 0.75], }; /** @@ -208,6 +276,7 @@ export function getProviderFromModel(model) { if (model.startsWith('gpt-')) return 'openai'; if (model.startsWith('claude-')) return 'anthropic'; if (model.startsWith('gemini-')) return 'gemini'; + if (model.startsWith('venice/') || VENICE_MODELS.includes(model)) return 'venice'; return null; } @@ -225,6 +294,9 @@ export function getAvailableModels() { if (hasProvider('gemini')) { models.push(MODELS.GEMINI_PRO, MODELS.GEMINI_FLASH); } + if (hasProvider('venice')) { + models.push(MODELS.VENICE_LLAMA_70B, MODELS.VENICE_LLAMA_3B, MODELS.VENICE_DEEPSEEK_R1); + } return models; } @@ -234,6 +306,7 @@ export function getAvailableModels() { export function getDefaultModel() { // Prefer cheaper/faster models as default if (hasProvider('gemini')) return MODELS.GEMINI_FLASH; + if (hasProvider('venice')) return MODELS.VENICE_LLAMA_3B; if (hasProvider('openai')) return MODELS.GPT4O_MINI; if (hasProvider('anthropic')) return MODELS.CLAUDE_HAIKU; throw new Error('No API keys configured'); @@ -245,6 +318,7 @@ export function getDefaultModel() { export function getBestModel() { if (hasProvider('anthropic')) return MODELS.CLAUDE_SONNET; if (hasProvider('openai')) return MODELS.GPT4O; + if (hasProvider('venice')) return MODELS.VENICE_DEEPSEEK_R1; if (hasProvider('gemini')) return MODELS.GEMINI_PRO; throw new Error('No API keys configured'); }