-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtts_cache_proxy.py
More file actions
241 lines (206 loc) · 8.9 KB
/
tts_cache_proxy.py
File metadata and controls
241 lines (206 loc) · 8.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#!/usr/bin/env python3
"""
Minimal TTS cache proxy for OpenAI's /v1/audio/speech
- Pure standard library (no external deps)
- Caches responses (mp3) on disk by a stable hash of the request JSON
- Supports CORS so it can be called from a browser app
- Uses incoming Authorization: Bearer ... header if present,
otherwise falls back to OPENAI_API_KEY env var.
Usage:
python3 tts_cache_proxy.py [--host 127.0.0.1] [--port 22999]
Then point your client to:
fetch('http://127.0.0.1:22999/v1/audio/speech', { ... same body & headers ... })
Notes:
- Only implements the specific endpoint used by your app: POST /v1/audio/speech
- Response format is audio/mpeg (mp3) when response_format == 'mp3'
- For any other method/path, returns 404
"""
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.request import Request, urlopen
from urllib.error import HTTPError, URLError
import os
import sys
import json
import hashlib
import time
from pathlib import Path
from argparse import ArgumentParser
UPSTREAM_URL = 'https://api.openai.com/v1/audio/speech'
CACHE_DIR = Path(os.environ.get('TTS_CACHE_DIR', './tts_cache')).resolve()
DEFAULT_HOST = os.environ.get('TTS_PROXY_HOST', '127.0.0.1')
DEFAULT_PORT = int(os.environ.get('TTS_PROXY_PORT', '22999'))
ALLOW_ORIGIN = os.environ.get('TTS_PROXY_CORS', '*') # set to specific origin if needed
CACHE_DIR.mkdir(parents=True, exist_ok=True)
def stable_cache_key(payload: dict) -> str:
"""Create a stable hash for the request payload relevant to audio output."""
# Normalize JSON (sort keys, remove insignificant whitespace)
norm = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(',', ':'))
return hashlib.sha256(norm.encode('utf-8')).hexdigest()
def read_json_body(handler: BaseHTTPRequestHandler) -> dict:
try:
length = int(handler.headers.get('Content-Length', '0'))
if length <= 0:
return {}
data = handler.rfile.read(length)
return json.loads(data.decode('utf-8'))
except json.JSONDecodeError:
handler.send_error(400, 'Invalid JSON')
raise
def put_cors_headers(handler: BaseHTTPRequestHandler):
handler.send_header('Access-Control-Allow-Origin', ALLOW_ORIGIN)
handler.send_header('Access-Control-Allow-Methods', 'POST, OPTIONS')
handler.send_header('Access-Control-Allow-Headers', 'Authorization, Content-Type, X-Preferred-Cache-Key')
handler.send_header('Access-Control-Max-Age', '600')
class TTSProxyHandler(BaseHTTPRequestHandler):
server_version = 'TTSCacheProxy/0.1'
def log(self, msg: str):
ts = time.strftime('%Y-%m-%d %H:%M:%S')
sys.stdout.write(f'[{ts}] {self.client_address[0]} {msg}\n')
sys.stdout.flush()
def do_OPTIONS(self):
if self.path == '/v1/audio/speech':
self.send_response(204)
put_cors_headers(self)
self.end_headers()
else:
self.send_response(404)
put_cors_headers(self)
self.end_headers()
def do_POST(self):
if self.path != '/v1/audio/speech':
self.send_response(404)
put_cors_headers(self)
self.end_headers()
return
try:
payload = read_json_body(self)
except Exception:
return # error already sent
# Basic validation
response_format = payload.get('response_format', 'mp3')
if response_format != 'mp3':
self.send_response(400)
put_cors_headers(self)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.end_headers()
self.wfile.write(json.dumps({'error': 'Only response_format=mp3 is supported by this proxy'}).encode('utf-8'))
return
# Resolve API key: prefer incoming Authorization header, else env
auth = self.headers.get('Authorization')
if not auth:
env_key = os.environ.get('OPENAI_API_KEY')
if env_key:
auth = f'Bearer {env_key}'
if not auth:
self.send_response(401)
put_cors_headers(self)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.end_headers()
self.wfile.write(json.dumps({'error': 'Missing Authorization header and OPENAI_API_KEY env'}).encode('utf-8'))
return
# Compute cache key from payload
key = stable_cache_key(payload)
# Use X-Preferred-Cache-Key header if provided (for readable filenames)
preferred_key = self.headers.get('X-Preferred-Cache-Key')
if preferred_key:
safe_key = ''.join(c for c in preferred_key if c.isalnum() or c in ('-', '_'))
safe_key = safe_key.strip('-_. ') # trim edge chars
safe_key = safe_key[:64] # limit length
if safe_key:
key = safe_key
mp3_path = CACHE_DIR / f'{key}.mp3'
# Serve from cache if exists
if mp3_path.exists():
self.log(f'CACHE HIT {key} ({mp3_path.name})')
try:
data = mp3_path.read_bytes()
self.send_response(200)
put_cors_headers(self)
self.send_header('Content-Type', 'audio/mpeg')
self.send_header('Content-Length', str(len(data)))
self.send_header('X-Cache', 'HIT')
self.end_headers()
self.wfile.write(data)
except Exception as e:
self.log(f'Error reading cache: {e}')
self.send_error(500, 'Failed to read cache')
return
# Otherwise, forward to OpenAI
self.log(f'CACHE MISS {key} → forwarding to OpenAI')
upstream_body = json.dumps(payload).encode('utf-8')
req = Request(
UPSTREAM_URL,
data=upstream_body,
headers={
'Authorization': auth,
'Content-Type': 'application/json',
'Accept': 'audio/mpeg',
},
method='POST',
)
try:
with urlopen(req, timeout=120) as resp:
status = resp.getcode()
data = resp.read()
ctype = resp.headers.get('Content-Type', 'audio/mpeg')
except HTTPError as e:
# Relay upstream error
err_body = e.read() or b''
self.send_response(e.code)
put_cors_headers(self)
ctype = e.headers.get('Content-Type', 'text/plain; charset=utf-8') if e.headers else 'text/plain; charset=utf-8'
self.send_header('Content-Type', ctype)
self.end_headers()
self.wfile.write(err_body)
self.log(f'Upstream HTTPError {e.code}: {err_body[:200]!r}')
return
except URLError as e:
self.send_response(502)
put_cors_headers(self)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.end_headers()
self.wfile.write(json.dumps({'error': f'Upstream connection failed: {e}'}).encode('utf-8'))
self.log(f'Upstream URLError: {e}')
return
except Exception as e:
self.send_response(500)
put_cors_headers(self)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.end_headers()
self.wfile.write(json.dumps({'error': f'Unexpected error: {e}'}).encode('utf-8'))
self.log(f'Unexpected error: {e}')
return
# Only cache successful mp3 responses
if status == 200 and (ctype.startswith('audio/mpeg') or ctype.startswith('audio/')):
try:
tmp_path = mp3_path.with_suffix('.mp3.part')
tmp_path.write_bytes(data)
os.replace(tmp_path, mp3_path)
except Exception as e:
self.log(f'Failed to write cache: {e}')
# Relay downstream
self.send_response(status)
put_cors_headers(self)
self.send_header('Content-Type', 'audio/mpeg')
self.send_header('Content-Length', str(len(data)))
self.send_header('X-Cache', 'MISS')
self.end_headers()
self.wfile.write(data)
# Suppress overly verbose default logging
def log_message(self, format, *args):
pass
def main():
ap = ArgumentParser(description='Minimal OpenAI TTS cache proxy')
ap.add_argument('--host', default=DEFAULT_HOST, help=f'Bind host (default {DEFAULT_HOST})')
ap.add_argument('--port', type=int, default=DEFAULT_PORT, help=f'Bind port (default {DEFAULT_PORT})')
args = ap.parse_args()
server = HTTPServer((args.host, args.port), TTSProxyHandler)
print(f'TTS cache proxy listening on http://{args.host}:{args.port}\nCache dir: {CACHE_DIR}\nUpstream: {UPSTREAM_URL}\n')
try:
server.serve_forever()
except KeyboardInterrupt:
print('\nShutting down...')
finally:
server.server_close()
if __name__ == '__main__':
main()