diff --git a/.gitignore b/.gitignore index 04c7330..b552e16 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ claude.md **.pyc __pycache__/* *.egg-info/* +/models diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..b60b198 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,33 @@ +# Repository Guidelines + +## Project Structure & Module Organization +- `mlx_video/` holds the core package. Entry points live in `mlx_video/generate.py` and `mlx_video/generate_av.py`, with utilities in `mlx_video/utils.py` and `mlx_video/postprocess.py`. +- Model code is under `mlx_video/models/ltx/`, including VAE components in `video_vae/` and `audio_vae/`, plus prompt templates in `prompts/`. +- Tests are in `tests/` (e.g., `tests/test_rope.py`, `tests/test_vae_streaming.py`). +- Sample output media lives in `examples/`. + +## Build, Test, and Development Commands +- Install (editable): `pip install -e '.[dev]'` or `uv pip install -e '.[dev]'` (installs pytest). +- Run the CLI: `uv run mlx_video.generate --prompt "..." -n 100 --width 768`. +- Module execution: `python -m mlx_video.generate --prompt "..." --height 768 --width 768`. +- Tests: `pytest` from the repo root. + +## Coding Style & Naming Conventions +- Python code uses 4-space indentation and standard PEP 8 conventions. +- Prefer descriptive, lowercase module and function names (`generate_av.py`, `decode_with_tiling`). +- Tests follow `test_*.py` naming and `Test*` classes with `test_*` methods. +- No formatter/linter is configured in-repo; keep formatting consistent with existing files. + +## Testing Guidelines +- Test framework: pytest (declared in `pyproject.toml` under `dev` extras). +- Keep tests deterministic by seeding MLX where applicable (see `mx.random.seed(42)` patterns). +- Cover shape invariants and numerical stability; use `np.testing.assert_allclose` for tensor checks. + +## Commit & Pull Request Guidelines +- Recent history uses short, imperative summaries (e.g., “add tests”, “Enhance video generation…”). Keep the subject concise and action-oriented. +- PRs should include a brief description of changes, testing performed (commands + results), and any relevant sample outputs (e.g., generated `.mp4` or `.gif`). +- If your change alters model outputs or performance, note the expected impact and any new flags. + +## Runtime & Environment Notes +- Target environment is macOS on Apple Silicon with Python >= 3.11 and MLX >= 0.22.0. +- Model weights are expected from Hugging Face (default `Lightricks/LTX-2`); ensure access and disk space before runs. diff --git a/README.md b/README.md index 190bdf7..9031789 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,20 @@ pip install git+https://github.com/Blaizzy/mlx-video.git uv pip install git+https://github.com/Blaizzy/mlx-video.git ``` +## Model Downloads + +Use the repo script to prefetch weights into the Hugging Face cache (so `snapshot_download` can find them locally): + +```bash +./download.sh +``` + +You can also set a custom cache location: + +```bash +./download.sh --hf-home /path/to/hf-cache +``` + Supported models: ### LTX-2 diff --git a/download.sh b/download.sh new file mode 100755 index 0000000..fc95335 --- /dev/null +++ b/download.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +set -euo pipefail + +MODEL_REPO="Lightricks/LTX-2" +TEXT_ENCODER_REPO="google/gemma-3-12b-it" +SKIP_TEXT_ENCODER=0 +HF_HOME_DIR="" +HF_MIRROR="https://hf-mirror.com" + +usage() { + cat <<'USAGE' +Usage: ./download.sh [options] + +Options: + --model-repo REPO Model repo to download (default: Lightricks/LTX-2) + --text-encoder-repo REPO Text encoder repo to download (default: google/gemma-3-12b-it) + --skip-text-encoder Skip downloading the text encoder repo + --hf-home DIR Set HF_HOME for Hugging Face cache location + -h, --help Show this help + +Notes: + - Set HF_TOKEN in your environment if the repo is gated or requires auth. + - The script uses HF mirror by default and automatically falls back to + https://huggingface.co when the mirror returns HTTP 403. + - Downloads go to the Hugging Face cache so the repo can find them. +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --model-repo) + MODEL_REPO="$2" + shift 2 + ;; + --text-encoder-repo) + TEXT_ENCODER_REPO="$2" + shift 2 + ;; + --skip-text-encoder) + SKIP_TEXT_ENCODER=1 + shift + ;; + --hf-home) + HF_HOME_DIR="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown option: $1" >&2 + usage + exit 1 + ;; + esac +done + +if command -v python3 >/dev/null 2>&1; then + PYTHON=python3 +else + PYTHON=python +fi + +export HF_ENDPOINT="$HF_MIRROR" +if [[ -n "$HF_HOME_DIR" ]]; then + export HF_HOME="$HF_HOME_DIR" +fi + +download_repo() { + local repo="$1" + shift 1 + local patterns_str + patterns_str=$(IFS='|'; echo "$*") + + REPO="$repo" ALLOW_PATTERNS="$patterns_str" "$PYTHON" - <<'PY' +import os +from huggingface_hub import snapshot_download + +repo = os.environ["REPO"] +patterns = os.environ.get("ALLOW_PATTERNS") +allow_patterns = patterns.split("|") if patterns else None +mirror = os.environ.get("HF_ENDPOINT") + +def run_download(endpoint): + if endpoint: + os.environ["HF_ENDPOINT"] = endpoint + label = endpoint + else: + os.environ.pop("HF_ENDPOINT", None) + label = "https://huggingface.co" + print(f"==> Downloading {repo} via {label}") + return snapshot_download( + repo_id=repo, + resume_download=True, + allow_patterns=allow_patterns, + ) + +try: + run_download(mirror) +except Exception as err: # pragma: no cover - shell script helper + status = getattr(getattr(err, "response", None), "status_code", None) + is_403 = status == 403 or "403" in str(err) + if is_403: + print("Mirror returned 403; retrying with https://huggingface.co") + run_download("https://huggingface.co") + else: + raise +PY +} + +download_repo "$MODEL_REPO" \ + "*.safetensors" \ + "*.json" \ + "tokenizer/**" \ + "tokenizer.*" \ + "special_tokens_map.json" \ + "tokenizer_config.json" \ + "tokenizer.json" \ + "vocab.json" \ + "merges.txt" + +if [[ "$SKIP_TEXT_ENCODER" -eq 0 ]]; then + download_repo "$TEXT_ENCODER_REPO" \ + "*.safetensors" \ + "*.json" \ + "tokenizer/**" \ + "tokenizer.*" \ + "special_tokens_map.json" \ + "tokenizer_config.json" \ + "tokenizer.json" \ + "tokenizer.model" \ + "vocab.json" \ + "merges.txt" +fi + +echo "Done. Downloaded models are in the Hugging Face cache."