Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,21 @@ Some context around building this is [located here](https://github.com/mpoon/gpt
To get started with `gpt-repository-loader`, follow these steps:

1. Ensure you have Python 3 installed on your system.
2. Clone or download the `gpt-repository-loader` repository.
3. Navigate to the repository's root directory in your terminal.
4. Run `gpt-repository-loader` with the following command:
2. Ensure you have GitPython installed on your system. You can install it with the following command:

```bash
pip install GitPython
```
3. Clone or download the `gpt-repository-loader` repository.
4. Navigate to the repository's root directory in your terminal.
5. Run `gpt-repository-loader` with the following command:

```bash
python gpt_repository_loader.py /path/to/git/repository
```
Replace `/path/to/git/repository` with the path to the Git repository you want to process.

5. The tool will generate an output.txt file containing the text representation of the repository. You can now use this file as input for AI language models or other text-based processing tasks.
6. The tool will generate an output.txt file containing the text representation of the repository. You can now use this file as input for AI language models or other text-based processing tasks.

## Running Tests

Expand Down
32 changes: 21 additions & 11 deletions gpt_repository_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import sys
import fnmatch
from git import Repo

def get_ignore_list(ignore_file_path):
ignore_list = []
Expand All @@ -18,17 +19,26 @@ def should_ignore(file_path, ignore_list):
return False

def process_repository(repo_path, ignore_list, output_file):
for root, _, files in os.walk(repo_path):
for file in files:
file_path = os.path.join(root, file)
relative_file_path = os.path.relpath(file_path, repo_path)

if not should_ignore(relative_file_path, ignore_list):
with open(file_path, 'r', errors='ignore') as file:
contents = file.read()
output_file.write("-" * 4 + "\n")
output_file.write(f"{relative_file_path}\n")
output_file.write(f"{contents}\n")
# Open the Git repository
repo = Repo(repo_path)

# Get the list of all tracked files in the repository
tracked_files = [f for f in repo.git.ls_files().split('\n') if f]

for file in tracked_files:
file_path = os.path.join(repo_path, file)
relative_file_path = os.path.relpath(file_path, repo_path)

# If the path is a directory, skip it
if os.path.isdir(file_path):
continue

if not should_ignore(relative_file_path, ignore_list):
with open(file_path, 'r', errors='ignore') as file:
contents = file.read()
output_file.write("-" * 4 + "\n")
output_file.write(f"{relative_file_path}\n")
output_file.write(f"{contents}\n")

if __name__ == "__main__":
if len(sys.argv) < 2:
Expand Down