forked from jckpn/RecursiveGPT
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRecursiveGPT.py
More file actions
78 lines (63 loc) · 2.92 KB
/
RecursiveGPT.py
File metadata and controls
78 lines (63 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import openai
import os
from tqdm import tqdm
import sys
def process_chunk(prompt, chunk, output_path):
with open(output_path, 'a') as output_file:
messages = [{'role': 'system', 'content': 'I am a helpful assistant.'},
{'role': 'user', 'content': (prompt + ' '.join(chunk))}]
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo',
messages=messages)
response = response['choices'][0]['message']['content']
output_file.write(response + '\n\n')
def split_file_to_chunks(prompt, input_path, output_path, chunk_size):
with open(input_path, 'r') as file:
content = file.read()
words = content.split()
# confirm with user
est_tokens = len(words)/0.75
cost_per_token = 0.0002/1000
est_cost = est_tokens*cost_per_token
num_chunks = round(len(words)/chunk_size)
est_time = est_tokens/4000*1.5 # around 1.5 mins per 4000 tokens
print(f'\nEstimated tokens required: {est_tokens:.1f} ({num_chunks} prompts with {chunk_size} words each)')
print(f'Estimated cost: between ${est_cost:.2f}-${est_cost*2:.2f}')
print(f'Estimated time: {est_time:.1f} minutes')
print(f'Press RETURN to continue or exit (Ctrl+Z) to cancel.')
input()
print(f'Writing full output to file {output_path}...')
for i in tqdm(range(0, len(words), chunk_size)):
chunk = words[i:i+chunk_size]
full_prompt = prompt + f'\n(Note: the following is an extract, words {i}-{i+chunk_size} of the {len(words)} word document.)\n\n'
print(full_prompt)
process_chunk(full_prompt, chunk, output_path)
print(f'Finished writing to {output_path}.')
if __name__ == '__main__':
api_key = input('Enter your OpenAI API key: ')
openai.api_key = api_key
# TODO: add checks for key validity
input_path = input('Enter the input path to the text file to process: ')
if os.path.exists(input_path) == False:
print(f'`{input_path}` can\'t be found.')
exit()
prompt = input('Enter the prompt to be prepended to each chunk of text: ')
if prompt == '':
print('A prompt is required to use this script.')
exit()
output_path = input('Enter the output path to the text file to write to (default: output.txt): ')
if output_path == '':
output_path = 'output.txt'
chunk_size = input('Enter the number of words per prompt (default: 2500): ')
if chunk_size == '':
chunk_size = 2500
else:
chunk_size = int(chunk_size)
if chunk_size < 1:
print('Chunk size must be greater than 0.')
exit()
elif chunk_size > 3000:
print('Chunk sizes greater than ~3000 are likely to fail due to model limitations. Continue? (y/n)')
if input() != 'y':
exit()
split_file_to_chunks(prompt, input_path, output_path, chunk_size)