From 4640e1dac2928b612a660c592eb1762678e58223 Mon Sep 17 00:00:00 2001
From: Leo Torres <leo@leotrs.com>
Date: Thu, 26 Mar 2026 10:50:22 +0100
Subject: [PATCH] Add researcher-friendly error messages for zip upload
 validation

Translate technical ZipValidator errors into plain language that
non-developer researchers can understand and act on. Groups related
errors (e.g. multiple forbidden files), strips jargon (no "symlink",
"MIME type", "path traversal"), and adds non-blocking warnings for
skipped files, large images, and high file counts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app/templates/partials/upload_form.html |   6 +
 app/upload/__init__.py                  |  10 +-
 app/upload/zip_errors.py                | 376 ++++++++++++++++++++++++
 static/css/main.css                     |  42 ++-
 tests/test_zip_errors.py                | 370 +++++++++++++++++++++++
 5 files changed, 802 insertions(+), 2 deletions(-)
 create mode 100644 app/upload/zip_errors.py
 create mode 100644 tests/test_zip_errors.py
diff --git a/app/templates/partials/upload_form.html b/app/templates/partials/upload_form.html
index cbd1fa2..69498bf 100644
--- a/app/templates/partials/upload_form.html
+++ b/app/templates/partials/upload_form.html
@@ -36,6 +36,12 @@ <h1>Upload New Scroll</h1>
 </div>
 {% endif %}
 
+{% if warnings %}
+<div class="form-warnings" id="form-warnings">
+    {{ warnings|safe }}
+</div>
+{% endif %}
+
 <form id="upload-form" method="POST" action="/upload-form" enctype="multipart/form-data">
     <input type="hidden" name="csrf_token" value="{{ csrf_token }}">
     {{ form_input("Title", "title", "text", form_data.title if form_data else "", required=true, help_text="A clear, descriptive title for your research scroll") }}
diff --git a/app/upload/__init__.py b/app/upload/__init__.py
index 6b5b892..8829fa6 100644
--- a/app/upload/__init__.py
+++ b/app/upload/__init__.py
@@ -2,5 +2,13 @@
 
 from .processors import HTMLProcessor
 from .validators import FileValidator
+from .zip_errors import ZipUploadResult, translate_zip_errors
+from .zip_validator import ZipValidator
 
-__all__ = ["HTMLProcessor", "FileValidator"]
+__all__ = [
+    "HTMLProcessor",
+    "FileValidator",
+    "ZipUploadResult",
+    "ZipValidator",
+    "translate_zip_errors",
+]
diff --git a/app/upload/zip_errors.py b/app/upload/zip_errors.py
new file mode 100644
index 0000000..fc570d7
--- /dev/null
+++ b/app/upload/zip_errors.py
@@ -0,0 +1,376 @@
+"""Researcher-friendly error messages for zip upload validation failures.
+
+Translates technical ZipValidator errors into plain-language messages that
+researchers (who may not be web developers) can understand and act on.
+"""
+
+from dataclasses import dataclass, field
+import html
+import re
+
+# Threshold for "many files" warning
+HIGH_FILE_COUNT_THRESHOLD = 200
+
+# Threshold for "large image" warning (10MB)
+LARGE_IMAGE_THRESHOLD = 10 * 1024 * 1024
+
+
+@dataclass
+class ZipUploadResult:
+    """Structured result of zip upload validation with friendly messages."""
+
+    errors: list[str] = field(default_factory=list)
+    warnings: list[str] = field(default_factory=list)
+    skipped_files: list[str] = field(default_factory=list)
+
+    @property
+    def has_errors(self) -> bool:
+        return len(self.errors) > 0
+
+    @property
+    def has_warnings(self) -> bool:
+        return len(self.warnings) > 0
+
+    def format_errors_html(self) -> str:
+        if not self.errors:
+            return ""
+        escaped = [html.escape(e) for e in self.errors]
+        items = "".join(f"<li>{e}</li>" for e in escaped)
+        return (
+            f"<strong>We found some problems with your archive:</strong>"
+            f'<ul style="margin-top: 0.5rem; margin-bottom: 0;">{items}</ul>'
+        )
+
+    def format_warnings_html(self) -> str:
+        if not self.warnings:
+            return ""
+        escaped = [html.escape(w) for w in self.warnings]
+        items = "".join(f"<li>{w}</li>" for w in escaped)
+        return f'<ul style="margin-top: 0.5rem; margin-bottom: 0;">{items}</ul>'
+
+
+# --- Pattern matchers for raw error classification ---
+
+_ARCHIVE_SIZE_RE = re.compile(r"Archive size \((.+?)\) exceeds the (\d+)MB limit")
+_UNCOMPRESSED_SIZE_RE = re.compile(r"Total uncompressed size \((.+?)\) exceeds the (\d+)MB limit")
+_FILE_COUNT_RE = re.compile(r"Archive contains (\d+) files, maximum is (\d+)")
+_DISALLOWED_EXT_RE = re.compile(r"File '(.+?)' has disallowed extension '(.+?)'")
+_NESTED_ARCHIVE_RE = re.compile(r"File '(.+?)' is a nested archive")
+_COMPRESSION_RATIO_RE = re.compile(r"File '(.+?)' has suspicious compression ratio")
+_SVG_FORBIDDEN_TAG_RE = re.compile(r"SVG '(.+?)' contains forbidden <(.+?)> element")
+_SVG_EVENT_HANDLER_RE = re.compile(
+    r"SVG '(.+?)' contains forbidden event handler attribute '(.+?)'"
+)
+_SVG_DANGEROUS_HREF_RE = re.compile(r"SVG '(.+?)' contains dangerous href")
+_HTML_VALIDATION_RE = re.compile(r"HTML validation error in '(.+?)': (.+)")
+_CONTENT_VALIDATION_RE = re.compile(r"Content validation error in '(.+?)': (.+)")
+_MIME_SPOOFING_RE = re.compile(r"File '(.+?)' has extension '.+?' but MIME type is '.+?'")
+_FILE_SIZE_LIMIT_RE = re.compile(r"File '(.+?)' \((.+?)\) exceeds the (\d+)MB size limit")
+_PATH_TRAVERSAL_RE = re.compile(r"Path traversal detected")
+_ABSOLUTE_PATH_RE = re.compile(r"Absolute path detected")
+_NULL_BYTE_RE = re.compile(r"Null byte detected")
+_BACKSLASH_RE = re.compile(r"Backslash detected in path")
+_PATH_RESOLVES_RE = re.compile(r"resolves outside extraction directory")
+_SYMLINK_RE = re.compile(r"Symlink detected")
+_FILENAME_CHARS_RE = re.compile(r"Filename '.+?' contains invalid characters")
+_FILENAME_LENGTH_RE = re.compile(r"Filename component '.+?' exceeds")
+_RESERVED_NAME_RE = re.compile(r"uses reserved name")
+_UTF8_RE = re.compile(r"File '(.+?)' is not valid UTF-8")
+_HTML_UTF8_RE = re.compile(r"HTML file '(.+?)' is not valid UTF-8")
+_NO_HTML_RE = re.compile(r"Archive must contain at least one .html file")
+_CORRUPT_RE = re.compile(r"not a valid zip archive|Could not open archive")
+_NOT_FOUND_RE = re.compile(r"Archive file not found")
+_SVG_PARSE_RE = re.compile(r"SVG file '(.+?)' could not be parsed")
+
+
+def translate_zip_errors(
+    raw_errors: list[str],
+    *,
+    skipped_files: list[str] | None = None,
+    large_files: list[tuple[str, int]] | None = None,
+    file_count: int | None = None,
+) -> ZipUploadResult:
+    """Translate raw ZipValidator errors into researcher-friendly messages.
+
+    Groups related errors (e.g. multiple forbidden file types) into single messages
+    so the researcher gets a clear, actionable summary.
+
+    Args:
+        raw_errors: Error strings from ZipValidator.validate()
+        skipped_files: Hidden/metadata files that were skipped (for warning)
+        large_files: List of (filename, size_bytes) for large image files (for warning)
+        file_count: Total file count in archive (for warning if high)
+    """
+    result = ZipUploadResult()
+
+    # Buckets for grouping related errors
+    forbidden_files: list[str] = []
+    nested_archives: list[str] = []
+    path_issues: list[str] = []
+    svg_issues: list[tuple[str, str]] = []  # (filename, description)
+    html_issues: list[tuple[str, str]] = []  # (filename, detail)
+    content_issues: list[tuple[str, str]] = []
+    file_size_issues: list[tuple[str, str]] = []  # (filename, category)
+    mime_issues: list[str] = []  # filenames
+    utf8_issues: list[str] = []  # filenames
+
+    for err in raw_errors:
+        # Archive-level: corrupt
+        if _CORRUPT_RE.search(err):
+            result.errors.append(
+                "We couldn't read your archive. Please try creating a new zip file "
+                "from your project folder."
+            )
+            continue
+
+        # Archive-level: not found
+        if _NOT_FOUND_RE.search(err):
+            result.errors.append("We couldn't find the uploaded file. Please try uploading again.")
+            continue
+
+        # Archive size
+        m = _ARCHIVE_SIZE_RE.search(err)
+        if m:
+            size, limit = m.group(1), m.group(2)
+            result.errors.append(
+                f"Your archive is {size}, which exceeds our {limit} MB limit. "
+                "Large files are usually images or data -- try compressing images "
+                "or hosting large datasets externally."
+            )
+            continue
+
+        # Uncompressed size
+        m = _UNCOMPRESSED_SIZE_RE.search(err)
+        if m:
+            size, limit = m.group(1), m.group(2)
+            result.errors.append(
+                f"Your archive is too large when extracted ({size}, limit {limit} MB). "
+                "Try compressing images or removing unnecessary files."
+            )
+            continue
+
+        # File count
+        m = _FILE_COUNT_RE.search(err)
+        if m:
+            count, maximum = m.group(1), m.group(2)
+            result.errors.append(
+                f"Your archive contains {count} files, which is more than our limit "
+                f"of {maximum}. Please reduce the number of files or remove "
+                "unnecessary ones."
+            )
+            continue
+
+        # No HTML
+        if _NO_HTML_RE.search(err):
+            result.errors.append(
+                "Your archive doesn't contain an HTML file. Press publishes "
+                "HTML research papers -- please include your paper as an .html file."
+            )
+            continue
+
+        # Disallowed extension -> group
+        m = _DISALLOWED_EXT_RE.search(err)
+        if m:
+            forbidden_files.append(m.group(1))
+            continue
+
+        # Nested archive -> group
+        m = _NESTED_ARCHIVE_RE.search(err)
+        if m:
+            nested_archives.append(m.group(1))
+            continue
+
+        # Compression ratio (zip bomb)
+        if _COMPRESSION_RATIO_RE.search(err):
+            result.errors.append(
+                "Your archive has an unusual compression pattern. "
+                "Please create a standard zip file from your project folder."
+            )
+            continue
+
+        # SVG forbidden tag
+        m = _SVG_FORBIDDEN_TAG_RE.search(err)
+        if m:
+            svg_issues.append((m.group(1), f"contains a <{m.group(2)}> element"))
+            continue
+
+        # SVG event handler
+        m = _SVG_EVENT_HANDLER_RE.search(err)
+        if m:
+            svg_issues.append((m.group(1), "contains interactive code"))
+            continue
+
+        # SVG dangerous href
+        m = _SVG_DANGEROUS_HREF_RE.search(err)
+        if m:
+            svg_issues.append((m.group(1), "contains interactive code"))
+            continue
+
+        # SVG parse error
+        m = _SVG_PARSE_RE.search(err)
+        if m:
+            svg_issues.append((m.group(1), "could not be read"))
+            continue
+
+        # HTML validation error
+        m = _HTML_VALIDATION_RE.search(err)
+        if m:
+            html_issues.append((m.group(1), m.group(2)))
+            continue
+
+        # Content validation error
+        m = _CONTENT_VALIDATION_RE.search(err)
+        if m:
+            content_issues.append((m.group(1), m.group(2)))
+            continue
+
+        # MIME type spoofing
+        m = _MIME_SPOOFING_RE.search(err)
+        if m:
+            mime_issues.append(m.group(1))
+            continue
+
+        # Per-file size limit
+        m = _FILE_SIZE_LIMIT_RE.search(err)
+        if m:
+            file_size_issues.append((m.group(1), m.group(2)))
+            continue
+
+        # Path safety issues -> group
+        if any(
+            p.search(err)
+            for p in [
+                _PATH_TRAVERSAL_RE,
+                _ABSOLUTE_PATH_RE,
+                _NULL_BYTE_RE,
+                _BACKSLASH_RE,
+                _PATH_RESOLVES_RE,
+            ]
+        ):
+            path_issues.append(err)
+            continue
+
+        # Symlinks -> path issue
+        if _SYMLINK_RE.search(err):
+            path_issues.append(err)
+            continue
+
+        # Filename issues -> path issue
+        if any(
+            p.search(err) for p in [_FILENAME_CHARS_RE, _FILENAME_LENGTH_RE, _RESERVED_NAME_RE]
+        ):
+            path_issues.append(err)
+            continue
+
+        # UTF-8 issues
+        m = _UTF8_RE.search(err) or _HTML_UTF8_RE.search(err)
+        if m:
+            utf8_issues.append(m.group(1))
+            continue
+
+        # Fallback: pass through unknown errors
+        result.errors.append(err)
+
+    # Emit grouped messages
+
+    if forbidden_files:
+        file_list = ", ".join(forbidden_files[:5])
+        extra = f" (and {len(forbidden_files) - 5} more)" if len(forbidden_files) > 5 else ""
+        result.errors.append(
+            f"Your archive contains files we can't host: {file_list}{extra}. "
+            "Please remove executable or archive files and re-upload. "
+            "HTML, CSS, JavaScript, images, fonts, and data files are all fine."
+        )
+
+    if nested_archives:
+        file_list = ", ".join(nested_archives[:5])
+        extra = f" (and {len(nested_archives) - 5} more)" if len(nested_archives) > 5 else ""
+        result.errors.append(
+            f"Your archive contains other archives: {file_list}{extra}. "
+            "Please zip your project folder directly -- don't zip a zip."
+        )
+
+    if path_issues:
+        result.errors.append(
+            "Some files in your archive have unusual paths. "
+            "Please re-create the zip from your project folder directly."
+        )
+
+    if svg_issues:
+        # Group by filename
+        seen = {}
+        for filename, desc in svg_issues:
+            if filename not in seen:
+                seen[filename] = desc
+        for filename, desc in seen.items():
+            result.errors.append(
+                f"One of your SVG images ({filename}) {desc}, which we can't "
+                "allow for security reasons. Please use a static SVG or convert to PNG."
+            )
+
+    if html_issues:
+        # Group by file
+        by_file: dict[str, list[str]] = {}
+        for filename, detail in html_issues:
+            by_file.setdefault(filename, []).append(detail)
+        for filename, details in by_file.items():
+            if len(details) == 1:
+                result.errors.append(
+                    f"Your HTML file ({filename}) has content we can't accept: {details[0]}"
+                )
+            else:
+                summary = "; ".join(details[:3])
+                extra = f" (and {len(details) - 3} more issues)" if len(details) > 3 else ""
+                result.errors.append(
+                    f"Your HTML file ({filename}) has content we can't accept: {summary}{extra}"
+                )
+
+    if content_issues:
+        for filename, detail in content_issues:
+            result.errors.append(f"Your HTML file ({filename}) has a content issue: {detail}")
+
+    if mime_issues:
+        file_list = ", ".join(mime_issues[:3])
+        result.errors.append(
+            f"Some files don't match their extension ({file_list}). "
+            "Please make sure files have the correct extension for their type."
+        )
+
+    if file_size_issues:
+        for filename, category in file_size_issues:
+            result.errors.append(
+                f"The file '{filename}' is too large for its type ({category}). "
+                "Please reduce the file size or remove it."
+            )
+
+    if utf8_issues:
+        file_list = ", ".join(utf8_issues[:3])
+        result.errors.append(
+            f"Some text files are not UTF-8 encoded ({file_list}). "
+            "Please save them with UTF-8 encoding."
+        )
+
+    # --- Warnings ---
+
+    if skipped_files:
+        count = len(skipped_files)
+        result.skipped_files = skipped_files
+        result.warnings.append(
+            f"{count} file{'s were' if count != 1 else ' was'} skipped "
+            "(macOS metadata, hidden files) -- this is normal."
+        )
+
+    if large_files:
+        names = ", ".join(name for name, _ in large_files[:3])
+        extra = f" (and {len(large_files) - 3} more)" if len(large_files) > 3 else ""
+        result.warnings.append(
+            f"Some image files are large (over 10 MB): {names}{extra}. "
+            "This may affect loading times for readers."
+        )
+
+    if file_count is not None and file_count >= HIGH_FILE_COUNT_THRESHOLD:
+        result.warnings.append(
+            f"Your archive has {file_count} files. Large archives may take longer to load."
+        )
+
+    return result
diff --git a/static/css/main.css b/static/css/main.css
index eeac00b..9ea48ee 100644
--- a/static/css/main.css
+++ b/static/css/main.css
@@ -28,6 +28,9 @@
   --error-border: #fecaca;
   --highlight-bg: #fef3c7;
   --highlight-text: #92400e;
+  --warning-bg: #fffbeb;
+  --warning-border: #fde68a;
+  --warning-text: #92400e;
 }
 
 /* Dark theme colors */
@@ -57,6 +60,9 @@
   --error-border: #991b1b;
   --highlight-bg: #92400e;
   --highlight-text: #fcd34d;
+  --warning-bg: #451a03;
+  --warning-border: #78350f;
+  --warning-text: #fcd34d;
 }
 
 /* Auto dark mode based on browser preference */
@@ -87,6 +93,9 @@
     --error-border: #991b1b;
     --highlight-bg: #92400e;
     --highlight-text: #fcd34d;
+    --warning-bg: #451a03;
+    --warning-border: #78350f;
+    --warning-text: #fcd34d;
   }
 }
 
@@ -582,6 +591,26 @@ button.btn, a.btn {
 .form-errors li:last-child {
   margin-bottom: 0;
 }
+.form-warnings {
+  background: var(--warning-bg);
+  border: 1px solid var(--warning-border);
+  border-radius: var(--border-radius);
+  padding: var(--space-md);
+  margin-bottom: var(--space-lg);
+}
+.form-warnings ul {
+  list-style: none;
+  margin: 0;
+  padding: 0;
+}
+.form-warnings li {
+  color: var(--warning-text);
+  font-size: var(--text-sm);
+  margin-bottom: var(--space-xs);
+}
+.form-warnings li:last-child {
+  margin-bottom: 0;
+}
 .success-message {
   background: var(--success-bg);
   border: 1px solid var(--success-border);
@@ -695,7 +724,18 @@ button.btn, a.btn {
     font-size: var(--mobile-text-sm);
     margin-bottom: var(--mobile-space-xs);
   }
-  
+
+  .form-warnings {
+    padding: var(--mobile-space-lg);
+    margin-bottom: var(--mobile-space-lg);
+    border-radius: var(--border-radius);
+  }
+
+  .form-warnings li {
+    font-size: var(--mobile-text-sm);
+    margin-bottom: var(--mobile-space-xs);
+  }
+
   .success-message {
     padding: var(--mobile-space-xl);
     border-radius: var(--border-radius);
diff --git a/tests/test_zip_errors.py b/tests/test_zip_errors.py
new file mode 100644
index 0000000..71baee1
--- /dev/null
+++ b/tests/test_zip_errors.py
@@ -0,0 +1,370 @@
+"""Tests for researcher-friendly zip upload error messages."""
+
+import io
+import stat
+import struct
+import zipfile
+import zlib
+
+from app.upload.zip_errors import (
+    ZipUploadResult,
+    translate_zip_errors,
+)
+from app.upload.zip_validator import ZipValidator
+
+
+def _make_zip(
+    files: dict[str, bytes | str], *, symlinks: list[tuple[str, str]] | None = None
+) -> bytes:
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
+        for name, content in files.items():
+            if isinstance(content, str):
+                content = content.encode("utf-8")
+            zf.writestr(name, content)
+        if symlinks:
+            for link_name, target in symlinks:
+                info = zipfile.ZipInfo(link_name)
+                info.external_attr = (stat.S_IFLNK | 0o777) << 16
+                zf.writestr(info, target.encode("utf-8"))
+    return buf.getvalue()
+
+
+def _minimal_png() -> bytes:
+    signature = b"\x89PNG\r\n\x1a\n"
+
+    def _chunk(chunk_type: bytes, data: bytes) -> bytes:
+        c = chunk_type + data
+        return struct.pack(">I", len(data)) + c + struct.pack(">I", zlib.crc32(c) & 0xFFFFFFFF)
+
+    ihdr_data = struct.pack(">IIBBBBB", 1, 1, 8, 2, 0, 0, 0)
+    ihdr = _chunk(b"IHDR", ihdr_data)
+    raw_row = b"\x00\xff\xff\xff"
+    idat = _chunk(b"IDAT", zlib.compress(raw_row))
+    iend = _chunk(b"IEND", b"")
+    return signature + ihdr + idat + iend
+
+
+MINIMAL_PNG = _minimal_png()
+
+MINIMAL_HTML = """<!DOCTYPE html>
+<html><head><title>Test</title></head>
+<body><p>This is a test academic paper with enough words to pass content validation.
+We need at least one hundred words so let us keep writing some more filler text here.
+The quick brown fox jumps over the lazy dog multiple times in this paragraph.
+Research shows that academic papers need structure and content to be meaningful.
+This document serves as a minimal valid HTML file for testing the zip validator.
+Additional sentences help us reach the word count threshold for validation.
+More text follows to ensure we have sufficient content for the validator.
+The final sentence in this paragraph completes our minimal test document.</p></body></html>"""
+
+
+class TestTranslateZipErrors:
+    """Test that technical errors become researcher-friendly messages."""
+
+    def test_no_html_found(self):
+        raw_errors = ["Archive must contain at least one .html file."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "HTML" in result.errors[0]
+        assert "include" in result.errors[0].lower() or ".html" in result.errors[0]
+
+    def test_archive_too_large(self):
+        raw_errors = ["Archive size (55.3MB) exceeds the 50MB limit."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "55.3 MB" in result.errors[0] or "55.3MB" in result.errors[0]
+        assert "50 MB" in result.errors[0] or "50MB" in result.errors[0]
+
+    def test_uncompressed_too_large(self):
+        raw_errors = ["Total uncompressed size (250MB) exceeds the 200MB limit."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert (
+            "extracted" in result.errors[0].lower() or "uncompressed" in result.errors[0].lower()
+        )
+
+    def test_corrupt_archive(self):
+        raw_errors = ["File is not a valid zip archive (corrupt or wrong format)."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "read" in result.errors[0].lower() or "new zip" in result.errors[0].lower()
+
+    def test_forbidden_files_grouped(self):
+        raw_errors = [
+            "File 'payload.exe' has disallowed extension '.exe'. Allowed: ...",
+            "File 'script.bat' has disallowed extension '.bat'. Allowed: ...",
+            "File 'run.sh' has disallowed extension '.sh'. Allowed: ...",
+        ]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "payload.exe" in result.errors[0]
+        assert "script.bat" in result.errors[0]
+        assert "run.sh" in result.errors[0]
+
+    def test_nested_archives_grouped(self):
+        raw_errors = [
+            "File 'inner.zip' is a nested archive. Nested archives are not allowed.",
+            "File 'data.tar.gz' is a nested archive. Nested archives are not allowed.",
+        ]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "inner.zip" in result.errors[0]
+        assert "data.tar.gz" in result.errors[0]
+
+    def test_path_issues(self):
+        raw_errors = [
+            "Path traversal detected in '../../../etc/passwd'. Paths containing '..' are not allowed."
+        ]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        # Should not expose the path traversal attempt detail
+        assert (
+            "unusual paths" in result.errors[0].lower() or "re-create" in result.errors[0].lower()
+        )
+
+    def test_too_many_files(self):
+        raw_errors = ["Archive contains 501 files, maximum is 500."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "500" in result.errors[0]
+
+    def test_zip_bomb_detected(self):
+        raw_errors = [
+            "File 'bomb.html' has suspicious compression ratio (150:1). Maximum allowed is 100:1."
+        ]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert (
+            "compression" in result.errors[0].lower() or "standard zip" in result.errors[0].lower()
+        )
+
+    def test_svg_security(self):
+        raw_errors = ["SVG 'image.svg' contains forbidden <script> element."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "image.svg" in result.errors[0]
+        assert "SVG" in result.errors[0]
+
+    def test_svg_event_handler(self):
+        raw_errors = ["SVG 'chart.svg' contains forbidden event handler attribute 'onclick'."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "chart.svg" in result.errors[0]
+
+    def test_svg_dangerous_href(self):
+        raw_errors = ["SVG 'link.svg' contains dangerous href: 'javascript:alert(1)'."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "link.svg" in result.errors[0]
+
+    def test_html_validation_error(self):
+        raw_errors = [
+            "HTML validation error in 'index.html': Forbidden tag <iframe> is not allowed"
+        ]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "index.html" in result.errors[0]
+
+    def test_multiple_error_categories(self):
+        """Multiple different categories should produce multiple user messages."""
+        raw_errors = [
+            "Archive contains 501 files, maximum is 500.",
+            "File 'payload.exe' has disallowed extension '.exe'. Allowed: ...",
+        ]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 2
+
+    def test_unknown_error_passes_through(self):
+        raw_errors = ["Some completely unexpected error message."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+
+    def test_mime_spoofing(self):
+        raw_errors = [
+            "File 'image.png' has extension '.png' but MIME type is 'application/x-executable' (possible type spoofing)."
+        ]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "image.png" in result.errors[0]
+
+    def test_file_size_limit(self):
+        raw_errors = ["File 'huge.svg' (svg) exceeds the 5MB size limit for its type."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "huge.svg" in result.errors[0]
+
+    def test_symlink_detected(self):
+        raw_errors = ["Symlink detected: 'evil_link'. Symlinks are not allowed."]
+        result = translate_zip_errors(raw_errors)
+        assert len(result.errors) == 1
+        assert "re-create" in result.errors[0].lower() or "unusual" in result.errors[0].lower()
+
+
+class TestZipUploadResult:
+    def test_has_errors(self):
+        result = ZipUploadResult(errors=["something wrong"], warnings=[])
+        assert result.has_errors is True
+
+    def test_no_errors(self):
+        result = ZipUploadResult(errors=[], warnings=[])
+        assert result.has_errors is False
+
+    def test_has_warnings(self):
+        result = ZipUploadResult(errors=[], warnings=["heads up"])
+        assert result.has_warnings is True
+
+    def test_format_errors_html(self):
+        result = ZipUploadResult(
+            errors=["Error one", "Error two"],
+            warnings=["Warning one"],
+        )
+        html = result.format_errors_html()
+        assert "<li>" in html
+        assert "Error one" in html
+        assert "Error two" in html
+
+    def test_format_warnings_html(self):
+        result = ZipUploadResult(
+            errors=[],
+            warnings=["Some files were skipped"],
+            skipped_files=["__MACOSX/._foo", ".DS_Store"],
+        )
+        html = result.format_warnings_html()
+        assert "skipped" in html.lower() or "Some files were skipped" in html
+
+    def test_format_errors_html_escapes_html(self):
+        result = ZipUploadResult(
+            errors=["Contains <script>alert('xss')</script>"],
+            warnings=[],
+        )
+        html = result.format_errors_html()
+        assert "<script>" not in html
+        assert "&lt;script&gt;" in html
+
+
+class TestSkippedFilesWarning:
+    """Hidden/skipped files should produce an informational warning."""
+
+    def test_skipped_files_produce_warning(self, tmp_path):
+        validator = ZipValidator()
+        files = {
+            "index.html": MINIMAL_HTML,
+            "__MACOSX/._index.html": "apple resource fork",
+            ".DS_Store": "\x00\x00",
+        }
+        data = _make_zip(files)
+        archive = tmp_path / "skipped.zip"
+        archive.write_bytes(data)
+
+        raw_errors = validator.validate(archive_path=str(archive))
+        assert raw_errors == []
+
+        result = translate_zip_errors(
+            raw_errors, skipped_files=["__MACOSX/._index.html", ".DS_Store"]
+        )
+        assert result.has_warnings
+        assert any("skipped" in w.lower() for w in result.warnings)
+
+    def test_no_skipped_files_no_warning(self):
+        result = translate_zip_errors([], skipped_files=[])
+        assert not result.has_warnings
+
+
+class TestLargeImageWarning:
+    def test_large_images_produce_warning(self):
+        result = translate_zip_errors(
+            [],
+            large_files=[("figures/plot.png", 12_000_000)],
+        )
+        assert result.has_warnings
+        assert any("image" in w.lower() or "loading" in w.lower() for w in result.warnings)
+
+
+class TestHighFileCountWarning:
+    def test_many_files_produce_warning(self):
+        result = translate_zip_errors(
+            [],
+            file_count=350,
+        )
+        assert result.has_warnings
+        assert any("350" in w for w in result.warnings)
+
+    def test_few_files_no_warning(self):
+        result = translate_zip_errors([], file_count=10)
+        assert not result.has_warnings
+
+
+class TestEndToEndErrorMessages:
+    """Integration tests: run validator then translate, check final messages."""
+
+    def test_exe_file_gives_friendly_message(self, tmp_path):
+        validator = ZipValidator()
+        data = _make_zip({"index.html": MINIMAL_HTML, "payload.exe": b"\x00"})
+        archive = tmp_path / "exe.zip"
+        archive.write_bytes(data)
+        raw_errors = validator.validate(str(archive))
+        result = translate_zip_errors(raw_errors)
+        assert result.has_errors
+        # The message should mention the filename and be understandable
+        assert "payload.exe" in result.errors[0]
+        assert "can't host" in result.errors[0].lower() or "remove" in result.errors[0].lower()
+
+    def test_no_html_gives_friendly_message(self, tmp_path):
+        validator = ZipValidator()
+        data = _make_zip({"style.css": "body {}", "app.js": "console.log('hi')"})
+        archive = tmp_path / "no_html.zip"
+        archive.write_bytes(data)
+        raw_errors = validator.validate(str(archive))
+        result = translate_zip_errors(raw_errors)
+        assert result.has_errors
+        assert "html" in result.errors[0].lower()
+        assert "include" in result.errors[0].lower() or ".html" in result.errors[0]
+
+    def test_corrupt_zip_gives_friendly_message(self, tmp_path):
+        validator = ZipValidator()
+        bad_file = tmp_path / "bad.zip"
+        bad_file.write_bytes(b"this is not a zip file")
+        raw_errors = validator.validate(str(bad_file))
+        result = translate_zip_errors(raw_errors)
+        assert result.has_errors
+        assert "new zip" in result.errors[0].lower() or "read" in result.errors[0].lower()
+
+    def test_researcher_can_understand_all_messages(self, tmp_path):
+        """No error message should contain technical jargon."""
+        jargon_terms = [
+            "null byte",
+            "symlink",
+            "path traversal",
+            "MIME type",
+            "compression ratio",
+            "external_attr",
+            "NFC",
+            "unicode",
+        ]
+
+        test_cases = [
+            (["Archive file not found."], "not found"),
+            (["File is not a valid zip archive (corrupt or wrong format)."], "corrupt"),
+            (["Symlink detected: 'evil_link'. Symlinks are not allowed."], "symlink"),
+            (
+                ["Path traversal detected in '../etc'. Paths containing '..' are not allowed."],
+                "traversal",
+            ),
+            (["Null byte detected in filename 'index\\x00.html'."], "null byte"),
+            (
+                [
+                    "File 'image.png' has extension '.png' but MIME type is 'text/plain' (possible type spoofing)."
+                ],
+                "mime",
+            ),
+        ]
+
+        for raw_errors, _label in test_cases:
+            result = translate_zip_errors(raw_errors)
+            for msg in result.errors:
+                msg_lower = msg.lower()
+                for term in jargon_terms:
+                    assert term.lower() not in msg_lower, (
+                        f"Jargon '{term}' found in message for {_label}: {msg}"
+                    )