diff --git a/README.md b/README.md index 61ac695..ed3d76c 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ As of this release, json-autotranslate offers five services: to translate strings) - **amazon-translate** (uses [Amazon Translate](https://aws.amazon.com/translate/) to translate strings) -- **OpenAI** (uses gpt-4o and can take a context file path from the context option) +- **OpenAI** (uses gpt-5 by default and can take a context file path from the context option as well as custom system prompt) - **manual** (allows you to translate strings manually by entering them into the CLI) - **dry-run** (outputs a list of strings that will be translated without @@ -272,6 +272,33 @@ At a minimum, this must include the AWS region. Amazon Translate offers a free tier, but is paid after that. See their [pricing](https://aws.amazon.com/translate/pricing/) page for details. +### OpenAI + +You need to have an account with [OpenAI](https://platform.openai.com/docs/overview). + +The `--config` value is a comma separated list of values and only the API key one (1st one) is required: + +``` +--config [apiKey][,systemPrompt][,model] +``` + +Where: + +- `apiKey` is the API key [obtained from OpenAI dashboard](https://platform.openai.com/api-keys). This key is secret, so don't commit it! +- `systemPrompt` can either be a string of a system prompt to pass instead of the default one or a path to file that contains such prompt (recommended). Default value is our built in prompt. +- `model` to use. You can check available models and their rate and token limits [on your dashboard](https://platform.openai.com/settings/organization/limits). Default: `gpt-5`. + +Recommendations: + +- Put your OpenAI API key in `.env.local` under `OPENAI_API_KEY` env var and use it with `source .env.local` before running translate +- Put your custom prompt in `json-autotranslate.prompt.md` file in your cwd. Write what tone should be used, the global context of translation (e.g. target group, type of product) and also include list of terms that shouldn't be translated (e.g. your app name) + +Example usage: + +``` +source .env.local && npx json-autotranslate --service openai --config ${OPENAI_API_KEY},./json-autotranslate.prompt.md,gpt-5-mini +``` + ### Manual This service doesn't require any configuration. You will be prompted to diff --git a/src/services/openai.ts b/src/services/openai.ts index 2560e81..6b18167 100644 --- a/src/services/openai.ts +++ b/src/services/openai.ts @@ -8,12 +8,14 @@ import fetch from 'node-fetch'; import * as fs from 'fs'; import * as path from 'path'; import { decode } from 'html-entities'; -import _ from 'lodash'; +import _, { chunk } from 'lodash'; +import chalk from 'chalk'; export class OpenAITranslator implements TranslationService { public name = 'OpenAI'; private apiKey?: string; private systemPrompt?: string; + private model?: string; private context?: { [key: string]: string }; private interpolationMatcher?: Matcher; private decodeEscapes?: boolean; @@ -30,10 +32,12 @@ export class OpenAITranslator implements TranslationService { throw new Error(`Please provide an API key for ${this.name}.`); } - const [apiKey, systemPrompt] = config.split(','); + const [apiKey, systemPrompt, model] = config.split(','); this.apiKey = apiKey; + this.model = model || 'gpt-5'; + console.log(chalk`├── using {green.bold ${String(this.model)}}`); this.systemPrompt = - systemPrompt || + this.loadSystemPrompt(systemPrompt) || ` You are an expert linguistic translator specializing in {sourceLang} to {targetLang} (ISO 639-1) translations. Your task is to provide accurate, contextually appropriate, and natural-sounding translations while adhering to the following guidelines: - Preserve the original meaning: Ensure that the core message and nuances of the source text are accurately conveyed in the target language. @@ -260,8 +264,21 @@ ISO to Language: from: string, to: string, ): Promise { + if (!this.systemPrompt) { + throw new Error('Missing system prompt'); + } + + type TranslationObject = { key: string, text: string, context: string, replacements: { from: string, to: string }[] }; + type TranslatedObject = TranslationObject & { translated: string }; + + const systemPromptFilled = this.systemPrompt + .replace('{sourceLang}', from) + .replace('{targetLang}', to); + const results: TranslationResult[] = []; + const translationsList: TranslationObject[] = []; + for (const stringItem of strings) { const { key, value } = stringItem; @@ -271,40 +288,62 @@ ISO to Language: // Get context for the key const contextForKey = _.get(this.context, key) || ''; - if (!this.systemPrompt) { - throw new Error('Missing system prompt'); - } + translationsList.push({ key, text: replaced.clean, context: contextForKey, replacements: replaced.replacements }); + } - // Prepare the messages for OpenAI API - const systemPromptFilled = this.systemPrompt - .replace('{sourceLang}', from) - .replace('{targetLang}', to); + // batch the translations list into chunks to avoid any rate limits and make it faster (looks like many small requests are faster than one large one) + const batches = chunk(translationsList, 25); + if (batches.length > 1) { + console.log(''); // empty line + } + + let batchIndex = 0; + for (const batch of batches) { + batchIndex++; + + const userPrompt = ` + I'm sending you a list of strings to translate. This is an array of objects in JSON format, where each object has the following keys: - const userPrompt = contextForKey - ? `Translation context: ${contextForKey}\n\nTranslate the following text: ${replaced.clean}` - : `Translate the following text: ${replaced.clean}`; + - key (string) - unique identifier for the string + - text (string) - the string to translate + - context (string) - the context for the string, if available + - replacements (array of objects) - the replacements for the string, if available, do not touch! - const messages = [ + Please translate each value of each object's "text" key into ${to}. + + Return the translated text in the same format as the input, with the "translated" key added to each object. + + Do not touch any other keys! + + ${JSON.stringify(batch)} + `; + + // Make the API call to OpenAI + const batchTranslations = await this.callOpenAIChatCompletion([ { role: 'system', content: systemPromptFilled }, { role: 'user', content: userPrompt }, - ]; + ]); - // Make the API call to OpenAI - const translatedText = await this.callOpenAIChatCompletion(messages); + const batchTranslationsParsed = JSON.parse(batchTranslations) as TranslatedObject[]; - // Re-insert interpolations - const finalTranslation = await reInsertInterpolations( - translatedText, - replaced.replacements, - ); + for (const translation of batchTranslationsParsed) { + const finalTranslation = await reInsertInterpolations( + translation.translated, + translation.replacements, + ); + + results.push({ + key: translation.key, + value: translation.text, + translated: this.decodeEscapes + ? decode(finalTranslation) + : finalTranslation, + }); + } - results.push({ - key, - value, - translated: this.decodeEscapes - ? decode(finalTranslation) - : finalTranslation, - }); + if (batches.length > 1) { + console.log(chalk`├── ${Math.round(batchIndex / batches.length * 100)}%`); + } } return results; @@ -317,9 +356,9 @@ ISO to Language: const apiUrl = 'https://api.openai.com/v1/chat/completions'; const requestBody = { - model: 'gpt-4o', + model: this.model, messages, - temperature: 0.3, + temperature: this.model === 'gpt-4o' ? 0.3 : 1, // gpt-5 has no temperature }; const response = await fetch(apiUrl, { @@ -343,4 +382,20 @@ ISO to Language: return assistantMessage; } + + private loadSystemPrompt(systemPrompt: string | undefined) { + if (!systemPrompt) { + console.log(chalk`├── using default system prompt`); + return undefined; + } + + const systemPromptFilePath = path.resolve(process.cwd(), systemPrompt); + if (fs.existsSync(systemPromptFilePath)) { + console.log(chalk`├── using system prompt from file: {green.bold ${systemPromptFilePath}}`); + return fs.readFileSync(systemPromptFilePath, 'utf-8'); + } + + console.log(chalk`├── using system prompt from string`); + return systemPrompt; + } }