From 337d21066e41eae71038a28c3641be24221112e0 Mon Sep 17 00:00:00 2001 From: hadishakir Date: Fri, 17 Mar 2023 15:21:09 +0400 Subject: [PATCH] Use GitPython library - to select only tracked files - works with .gitignore --- README.md | 13 +++++++++---- gpt_repository_loader.py | 32 +++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index f551d06..83a0544 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,21 @@ Some context around building this is [located here](https://github.com/mpoon/gpt To get started with `gpt-repository-loader`, follow these steps: 1. Ensure you have Python 3 installed on your system. -2. Clone or download the `gpt-repository-loader` repository. -3. Navigate to the repository's root directory in your terminal. -4. Run `gpt-repository-loader` with the following command: +2. Ensure you have GitPython installed on your system. You can install it with the following command: + + ```bash + pip install GitPython + ``` +3. Clone or download the `gpt-repository-loader` repository. +4. Navigate to the repository's root directory in your terminal. +5. Run `gpt-repository-loader` with the following command: ```bash python gpt_repository_loader.py /path/to/git/repository ``` Replace `/path/to/git/repository` with the path to the Git repository you want to process. -5. The tool will generate an output.txt file containing the text representation of the repository. You can now use this file as input for AI language models or other text-based processing tasks. +6. The tool will generate an output.txt file containing the text representation of the repository. You can now use this file as input for AI language models or other text-based processing tasks. ## Running Tests diff --git a/gpt_repository_loader.py b/gpt_repository_loader.py index 68c663d..ae814cc 100755 --- a/gpt_repository_loader.py +++ b/gpt_repository_loader.py @@ -3,6 +3,7 @@ import os import sys import fnmatch +from git import Repo def get_ignore_list(ignore_file_path): ignore_list = [] @@ -18,17 +19,26 @@ def should_ignore(file_path, ignore_list): return False def process_repository(repo_path, ignore_list, output_file): - for root, _, files in os.walk(repo_path): - for file in files: - file_path = os.path.join(root, file) - relative_file_path = os.path.relpath(file_path, repo_path) - - if not should_ignore(relative_file_path, ignore_list): - with open(file_path, 'r', errors='ignore') as file: - contents = file.read() - output_file.write("-" * 4 + "\n") - output_file.write(f"{relative_file_path}\n") - output_file.write(f"{contents}\n") + # Open the Git repository + repo = Repo(repo_path) + + # Get the list of all tracked files in the repository + tracked_files = [f for f in repo.git.ls_files().split('\n') if f] + + for file in tracked_files: + file_path = os.path.join(repo_path, file) + relative_file_path = os.path.relpath(file_path, repo_path) + + # If the path is a directory, skip it + if os.path.isdir(file_path): + continue + + if not should_ignore(relative_file_path, ignore_list): + with open(file_path, 'r', errors='ignore') as file: + contents = file.read() + output_file.write("-" * 4 + "\n") + output_file.write(f"{relative_file_path}\n") + output_file.write(f"{contents}\n") if __name__ == "__main__": if len(sys.argv) < 2: