diff --git a/repo_to_single_page.py b/repo_to_single_page.py index b5d966b..f7af268 100644 --- a/repo_to_single_page.py +++ b/repo_to_single_page.py @@ -33,6 +33,7 @@ import sys import tempfile import webbrowser +from collections import defaultdict, Counter from dataclasses import dataclass from typing import List, Tuple @@ -225,6 +226,382 @@ def generate_cxml_text(infos: List[FileInfo], repo_dir: pathlib.Path) -> str: return "\n".join(lines) +def generate_advanced_stats(infos: List[FileInfo]) -> str: + """Generate detailed repository statistics.""" + rendered = [i for i in infos if i.decision.include] + + # File type analysis + ext_stats = Counter() + lang_stats = defaultdict(lambda: {'count': 0, 'size': 0}) + + for file_info in rendered: + ext = pathlib.Path(file_info.rel).suffix.lower() or 'no-extension' + ext_stats[ext] += 1 + + # Language categorization + lang = 'Other' + if ext in {'.py', '.pyw'}: lang = 'Python' + elif ext in {'.js', '.jsx', '.ts', '.tsx'}: lang = 'JavaScript/TypeScript' + elif ext in {'.html', '.htm'}: lang = 'HTML' + elif ext in {'.css', '.scss', '.sass', '.less'}: lang = 'CSS' + elif ext in {'.java'}: lang = 'Java' + elif ext in {'.cpp', '.cc', '.cxx', '.c', '.h', '.hpp'}: lang = 'C/C++' + elif ext in {'.rs'}: lang = 'Rust' + elif ext in {'.go'}: lang = 'Go' + elif ext in {'.php'}: lang = 'PHP' + elif ext in {'.rb'}: lang = 'Ruby' + elif ext in {'.swift'}: lang = 'Swift' + elif ext in {'.kt', '.kts'}: lang = 'Kotlin' + elif ext in MARKDOWN_EXTENSIONS: lang = 'Markdown' + elif ext in {'.json', '.yaml', '.yml', '.toml', '.xml'}: lang = 'Config/Data' + elif ext in {'.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd'}: lang = 'Shell Scripts' + + lang_stats[lang]['count'] += 1 + lang_stats[lang]['size'] += file_info.size + + # Directory depth analysis + depth_stats = Counter() + for file_info in rendered: + depth = len(file_info.rel.split('/')) - 1 + depth_stats[depth] += 1 + + # Size analysis + total_size = sum(f.size for f in rendered) + avg_size = total_size / len(rendered) if rendered else 0 + + # Generate HTML + stats_html = f""" +
+
+

๐Ÿ“Š Size Analysis

+
Total Size: {bytes_human(total_size)}
+
Average File Size: {bytes_human(int(avg_size))}
+
Largest File: {bytes_human(max((f.size for f in rendered), default=0))}
+
+ +
+

๐Ÿ—‚๏ธ Directory Structure

+
Max Depth: {max(depth_stats.keys(), default=0)} levels
+
Root Files: {depth_stats.get(0, 0)}
+
Nested Files: {sum(count for depth, count in depth_stats.items() if depth > 0)}
+
+
+ +
+
+

๐Ÿ”ค Languages & File Types

+
+ {''.join(f''' +
+
{lang}
+
+ {stats["count"]} files โ€ข {bytes_human(stats["size"])} +
+
+ ''' for lang, stats in sorted(lang_stats.items(), key=lambda x: x[1]["count"], reverse=True)[:8])} +
+
+ +
+

๐Ÿ“‚ Top File Extensions

+
+ {''.join(f''' +
+ {ext} + {count} +
+ ''' for ext, count in ext_stats.most_common(12))} +
+
+
+ """ + + return stats_html + + +def add_pwa_features() -> str: + """Add PWA manifest and service worker inline.""" + return ''' + + + + + + + + + ''' + + +def add_export_features() -> str: + """Add export functionality.""" + return ''' + +
+

๐Ÿ“ค Export & Share

+
+ + + + +
+
+ ''' + + +def add_interactive_features() -> str: + """Add interactive JavaScript features.""" + return ''' + + ''' + + def build_html(repo_url: str, repo_dir: pathlib.Path, head_commit: str, infos: List[FileInfo]) -> str: formatter = HtmlFormatter(nowrap=False) pygments_css = formatter.get_style_defs('.highlight') @@ -241,16 +618,103 @@ def build_html(repo_url: str, repo_dir: pathlib.Path, head_commit: str, infos: L # Generate CXML text for LLM view cxml_text = generate_cxml_text(infos, repo_dir) + + # Generate advanced stats + advanced_stats_html = generate_advanced_stats(infos) + + # Get additional features + pwa_features = add_pwa_features() + export_features = add_export_features() + interactive_features = add_interactive_features() - # Table of contents + # Table of contents with directory tree structure toc_items: List[str] = [] + + # Group files by directory for tree structure + file_tree = {} for i in rendered: - anchor = slugify(i.rel) - toc_items.append( - f'
  • {html.escape(i.rel)} ' - f'({bytes_human(i.size)})
  • ' - ) - toc_html = "".join(toc_items) + path_parts = i.rel.split('/') + current = file_tree + for part in path_parts[:-1]: # directories + if part not in current: + current[part] = {} + current = current[part] + # Add file to the current directory + if '_files' not in current: + current['_files'] = [] + current['_files'].append(i) + + def generate_tree_items(tree, path_prefix="", depth=0): + items = [] + + # First add directories + for dir_name in sorted(key for key in tree.keys() if key != '_files'): + dir_path = f"{path_prefix}/{dir_name}" if path_prefix else dir_name + indent = " " * depth + folder_icon = "๐Ÿ“" if depth == 0 else "๐Ÿ“‚" + items.append(f'
  • {indent}{folder_icon} {html.escape(dir_name)}/
  • ') + items.extend(generate_tree_items(tree[dir_name], dir_path, depth + 1)) + + # Then add files in current directory + if '_files' in tree: + for file_info in sorted(tree['_files'], key=lambda f: f.rel.split('/')[-1].lower()): + anchor = slugify(file_info.rel) + filename = file_info.rel.split('/')[-1] + indent = " " * (depth + 1) + + # Get file icon + ext = pathlib.Path(filename).suffix.lower() + file_icon = "๐Ÿ“„" # default + if ext in MARKDOWN_EXTENSIONS: + file_icon = "๐Ÿ“" + elif ext in {".py", ".pyw"}: + file_icon = "๐Ÿ" + elif ext in {".js", ".jsx", ".ts", ".tsx"}: + file_icon = "โšก" + elif ext in {".html", ".htm"}: + file_icon = "๐ŸŒ" + elif ext in {".css", ".scss", ".sass", ".less"}: + file_icon = "๐ŸŽจ" + elif ext in {".json", ".jsonl", ".yaml", ".yml", ".toml"}: + file_icon = "โš™๏ธ" + elif ext in {".sh", ".bash", ".zsh", ".fish", ".ps1", ".bat", ".cmd"}: + file_icon = "๐Ÿ”ง" + elif ext in {".sql"}: + file_icon = "๐Ÿ—ƒ๏ธ" + elif ext in {".java", ".class"}: + file_icon = "โ˜•" + elif ext in {".cpp", ".cc", ".cxx", ".c", ".h", ".hpp"}: + file_icon = "โš™๏ธ" + elif ext in {".rs"}: + file_icon = "๐Ÿฆ€" + elif ext in {".go"}: + file_icon = "๐Ÿ”ต" + elif ext in {".php"}: + file_icon = "๐Ÿ˜" + elif ext in {".rb"}: + file_icon = "๐Ÿ’Ž" + elif ext in {".swift"}: + file_icon = "๐Ÿ•Š๏ธ" + elif ext in {".kt", ".kts"}: + file_icon = "๐Ÿ“ฑ" + elif filename.lower() in {"readme", "readme.md", "readme.txt"}: + file_icon = "๐Ÿ“š" + elif filename.lower() in {"license", "licence", "copying"}: + file_icon = "๐Ÿ“œ" + elif ext in {".txt", ".log"}: + file_icon = "๐Ÿ“‹" + elif ext in {".xml"}: + file_icon = "๐Ÿท๏ธ" + elif ext in {".gitignore", ".gitattributes"}: + file_icon = "๐Ÿ™ˆ" + + items.append(f'
  • {indent}{file_icon} {html.escape(filename)} ({bytes_human(file_info.size)})
  • ') + + return items + + # Generate root level items + root_items = generate_tree_items(file_tree) + toc_html = "".join(root_items) # Render file sections sections: List[str] = [] @@ -258,18 +722,71 @@ def build_html(repo_url: str, repo_dir: pathlib.Path, head_commit: str, infos: L anchor = slugify(i.rel) p = i.path ext = p.suffix.lower() + + # Determine file icon based on extension + file_icon = "๐Ÿ“„" # default + if ext in MARKDOWN_EXTENSIONS: + file_icon = "๐Ÿ“" + elif ext in {".py", ".pyw"}: + file_icon = "๐Ÿ" + elif ext in {".js", ".jsx", ".ts", ".tsx"}: + file_icon = "โšก" + elif ext in {".html", ".htm"}: + file_icon = "๐ŸŒ" + elif ext in {".css", ".scss", ".sass", ".less"}: + file_icon = "๐ŸŽจ" + elif ext in {".json", ".jsonl", ".yaml", ".yml", ".toml"}: + file_icon = "โš™๏ธ" + elif ext in {".sh", ".bash", ".zsh", ".fish", ".ps1", ".bat", ".cmd"}: + file_icon = "๐Ÿ”ง" + elif ext in {".sql"}: + file_icon = "๐Ÿ—ƒ๏ธ" + elif ext in {".java", ".class"}: + file_icon = "โ˜•" + elif ext in {".cpp", ".cc", ".cxx", ".c", ".h", ".hpp"}: + file_icon = "โš™๏ธ" + elif ext in {".rs"}: + file_icon = "๐Ÿฆ€" + elif ext in {".go"}: + file_icon = "๐Ÿ”ต" + elif ext in {".php"}: + file_icon = "๐Ÿ˜" + elif ext in {".rb"}: + file_icon = "๐Ÿ’Ž" + elif ext in {".swift"}: + file_icon = "๐Ÿ•Š๏ธ" + elif ext in {".kt", ".kts"}: + file_icon = "๐Ÿ“ฑ" + elif ext in {".dockerfile", ".dockerignore"} or p.name.lower() in {"dockerfile", "docker-compose.yml", "docker-compose.yaml"}: + file_icon = "๐Ÿณ" + elif p.name.lower() in {"readme", "readme.md", "readme.txt"}: + file_icon = "๐Ÿ“š" + elif p.name.lower() in {"license", "licence", "copying"}: + file_icon = "๐Ÿ“œ" + elif ext in {".txt", ".log"}: + file_icon = "๐Ÿ“‹" + elif ext in {".xml"}: + file_icon = "๐Ÿท๏ธ" + elif ext in {".gitignore", ".gitattributes"}: + file_icon = "๐Ÿ™ˆ" + try: text = read_text(p) if ext in MARKDOWN_EXTENSIONS: - body_html = render_markdown_text(text) + body_html = f'
    {render_markdown_text(text)}
    ' else: code_html = highlight_code(text, i.rel, formatter) body_html = f'
    {code_html}
    ' except Exception as e: body_html = f'
    Failed to render: {html.escape(str(e))}
    ' + sections.append(f"""
    -

    {html.escape(i.rel)} ({bytes_human(i.size)})

    +

    +
    + {html.escape(i.rel)} ({bytes_human(i.size)}) +
    +

    {body_html}
    โ†‘ Back to top
    @@ -301,99 +818,1103 @@ def render_skip_list(title: str, items: List[FileInfo]) -> str: -Flattened repo โ€“ {html.escape(repo_url)} +๐Ÿ“š {html.escape(repo_url)} - Code Repository +{pwa_features} + + + +
    + +
    + +
    Repository Explorer
    +
    + + + +
    - -
    +
    +
    +

    Repository Explorer

    - - HEAD commit: {html.escape(head_commit)} -
    - Total files: {total_files} ยท Rendered: {len(rendered)} ยท Skipped: {len(skipped_binary) + len(skipped_large) + len(skipped_ignored)} -
    +
    ๐Ÿ“ Repository: {html.escape(repo_url)}
    +
    ๐Ÿ”— HEAD commit: {html.escape(head_commit[:12])}
    +
    + ๐Ÿ“Š Statistics: {total_files} total files โ€ข {len(rendered)} rendered โ€ข {len(skipped_binary) + len(skipped_large) + len(skipped_ignored)} skipped +
    -
    +
    + + + {advanced_stats_html}
    - View: - - + View Mode: + +
    -
    -

    Directory tree

    +
    +

    ๐ŸŒณ Directory Structure

    {html.escape(tree_text)}
    -
    +
    -
    -

    Table of contents ({len(rendered)})

    +
    +

    ๐Ÿ“‹ File Index ({len(rendered)} files)

    -
    + + + {export_features} -
    -

    Skipped items

    +
    +

    โš ๏ธ Excluded Files

    {skipped_html} -
    + - {''.join(sections)} +
    + {''.join(sections)} +
    -
    -

    ๐Ÿค– LLM View - CXML Format

    -

    Copy the text below and paste it to an LLM for analysis:

    +
    +

    ๐Ÿค– LLM-Optimized View

    +

    + This view presents the repository content in CXML format, optimized for Large Language Model analysis. + Simply copy the content below and paste it into your preferred LLM interface. +

    - ๐Ÿ’ก Tip: Click in the text area and press Ctrl+A (Cmd+A on Mac) to select all, then Ctrl+C (Cmd+C) to copy. + ๐Ÿ’ก Pro tip: Click in the text area above and use Ctrl+A (or Cmd+A on Mac) to select all content, then Ctrl+C (or Cmd+C) to copy to clipboard.
    -
    +
    + +{interactive_features} """