From 2a83eda0cc88f4152725e1b925eb4f789f66e805 Mon Sep 17 00:00:00 2001 From: Alfred Nguyen Date: Thu, 22 May 2025 14:06:42 +0200 Subject: [PATCH 1/3] feat: dataset generation with procgen and gym --- README.md | 14 +++++++++- generate_dataset.py | 4 +-- generate_dataset_gym.py | 61 +++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 +- 4 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 generate_dataset_gym.py diff --git a/README.md b/README.md index e9503a2..16b6a3c 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,23 @@ pip install -r requirements.txt Before training the models, generate the CoinRun dataset by running: ```bash -python generate_dataset.py --num_episodes 10000 +python generate_dataset.py --num_episodes 10000 --env_name coinrun ``` +See [here](https://github.com/openai/procgen?tab=readme-ov-file#environments) for more environments. + Note: this is a large dataset (around 100GB) and may take a while to generate. +To generate other datasets from the gym environment run: + +```bash +python generate_dataset_gym.py --num_episodes 10000 --env_name Acrobot-v1 +``` + +See [here](https://gym.openai.com/envs/#classic_control) for more environments. + +Note: This project uses gym==0.25.2 for backwards compatibility. Newer versions of gym are not supported. +

Quick Start 🚀

Genie has three components: a [video tokenizer](models/tokenizer.py), a [latent action model](models/lam.py), and a [dynamics model](models/dynamics.py). Each of these components are trained separately, however, the dynamics model requires a pre-trained video tokenizer and latent action model. diff --git a/generate_dataset.py b/generate_dataset.py index a67c424..6b1a58f 100644 --- a/generate_dataset.py +++ b/generate_dataset.py @@ -15,12 +15,12 @@ @dataclass class Args: num_episodes: int = 10000 - output_dir: str = "data/coinrun_episodes" + env_name: str = "coinrun" min_episode_length: int = 50 args = tyro.cli(Args) -output_dir = Path(args.output_dir) +output_dir = f"data/{args.env_name}_episodes" output_dir.mkdir(parents=True, exist_ok=True) # --- Generate episodes --- diff --git a/generate_dataset_gym.py b/generate_dataset_gym.py new file mode 100644 index 0000000..74e70e5 --- /dev/null +++ b/generate_dataset_gym.py @@ -0,0 +1,61 @@ +""" +Generates a dataset from the gym environment. +Episodes are saved individually as memory-mapped files for efficient loading. +""" + +from dataclasses import dataclass +from pathlib import Path + +import gym3 +import numpy as np +import tyro +import time + +@dataclass +class Args: + num_episodes: int = 10000 + env_name: str = "Acrobot-v1" + min_episode_length: int = 50 + + +def main(): + args = tyro.cli(Args) + output_dir = Path(f"data/{args.env_name}_episodes_{args.num_episodes}") + output_dir.mkdir(parents=True, exist_ok=True) + + # --- Generate episodes --- + i = 0 + metadata = [] + while i < args.num_episodes: + env = gym3.vectorize_gym(num=1, env_kwargs={"id": args.env_name}) + dataseq = [] + + # --- Run episode --- + for j in range(1000): + env.act(gym3.types_np.sample(env.ac_space, bshape=(env.num,))) + rew, obs, first = env.observe() + dataseq.append(obs) + if first: + break + + # --- Save episode --- + if len(dataseq) >= args.min_episode_length: + episode_data = np.concatenate(dataseq, axis=0) + episode_path = output_dir / f"episode_{i}.npy" + np.save(episode_path, episode_data.astype(np.uint8)) + metadata.append({"path": str(episode_path), "length": len(dataseq)}) + print(f"Episode {i} completed, length: {len(dataseq)}") + i += 1 + else: + print(f"Episode too short ({len(dataseq)}), resampling...") + + # --- Save metadata --- + np.save(output_dir / "metadata.npy", metadata) + print(f"Dataset generated with {len(metadata)} valid episodes, saving to {output_dir}") + + +if __name__ == '__main__': + start_time = time.time() + main() + end_time = time.time() + print(f"Time taken: {end_time - start_time} seconds") diff --git a/requirements.txt b/requirements.txt index 8699240..9e70627 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ optax>=0.2.3 procgen>=0.10.7 torch>=2.0.1 tyro>=0.8.5 -wandb>=0.17.4 \ No newline at end of file +wandb>=0.17.4 +gym==0.25.2 # Last version that supports backward compatibility https://github.com/openai/gym/releases/tag/0.26.0 \ No newline at end of file From 603ca9b4b1a7f8bc52e5fbb1470cd05296807bbf Mon Sep 17 00:00:00 2001 From: Alfred Date: Sat, 24 May 2025 19:06:56 +0200 Subject: [PATCH 2/3] feature: multi cpu gym env generation --- generate_dataset.py | 12 +++- generate_dataset_gym_multi.py | 70 +++++++++++++++++++ ...t_gym.py => generate_dataset_gym_single.py | 30 +++++--- requirements.txt | 5 +- 4 files changed, 105 insertions(+), 12 deletions(-) create mode 100644 generate_dataset_gym_multi.py rename generate_dataset_gym.py => generate_dataset_gym_single.py (61%) diff --git a/generate_dataset.py b/generate_dataset.py index 6b1a58f..39251ab 100644 --- a/generate_dataset.py +++ b/generate_dataset.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from pathlib import Path +import time from gym3 import types_np import numpy as np @@ -20,18 +21,21 @@ class Args: args = tyro.cli(Args) -output_dir = f"data/{args.env_name}_episodes" +output_dir = Path(f"data/{args.env_name}_episodes") output_dir.mkdir(parents=True, exist_ok=True) # --- Generate episodes --- i = 0 metadata = [] +times= [] while i < args.num_episodes: seed = np.random.randint(0, 10000) env = ProcgenGym3Env(num=1, env_name="coinrun", start_level=seed) dataseq = [] # --- Run episode --- + print(f"Generating episode {i}...") + start_time = time.time() for j in range(1000): env.act(types_np.sample(env.ac_space, bshape=(env.num,))) rew, obs, first = env.observe() @@ -45,11 +49,15 @@ class Args: episode_path = output_dir / f"episode_{i}.npy" np.save(episode_path, episode_data.astype(np.uint8)) metadata.append({"path": str(episode_path), "length": len(dataseq)}) - print(f"Episode {i} completed, length: {len(dataseq)}") + # print time per episode + times.append(time.time() - start_time) + print(f"Episode {i} completed, length: {len(dataseq)}, time: {time.time() - start_time}") i += 1 else: print(f"Episode too short ({len(dataseq)}), resampling...") + # --- Save metadata --- np.save(output_dir / "metadata.npy", metadata) print(f"Dataset generated with {len(metadata)} valid episodes") +print(f"Average time per episode: {np.mean(times)}") diff --git a/generate_dataset_gym_multi.py b/generate_dataset_gym_multi.py new file mode 100644 index 0000000..5134245 --- /dev/null +++ b/generate_dataset_gym_multi.py @@ -0,0 +1,70 @@ +""" +Generates a dataset from the gym environment. +Episodes are saved individually as memory-mapped files for efficient loading. +""" + +from dataclasses import dataclass +from pathlib import Path + +import gymnasium as gym +import numpy as np +import tyro +import time +import multiprocessing as mp + +@dataclass +class Args: + num_episodes: int = 10000 + env_name: str = "Acrobot-v1" + min_episode_length: int = 50 + seed: int = 42 + + +def generate_episode(args_tuple): + env_name, min_episode_length, seed, episode_idx, output_dir = args_tuple + env = gym.make(env_name, render_mode="rgb_array") + observation, info = env.reset(seed=seed + episode_idx) + dataseq = [] + print(f"Episode {episode_idx} started") + for j in range(1000): + action = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(action) + dataseq.append(env.render()) + if terminated or truncated: + break + if len(dataseq) >= min_episode_length: + episode_data = np.stack(dataseq, axis=0) + episode_path = output_dir / f"episode_{episode_idx}.npy" + np.save(episode_path, episode_data.astype(np.uint8)) + print(f"Episode {episode_idx} saved") + + return {"path": str(episode_path), "length": len(dataseq)} + else: + return None + +def main(): + args = tyro.cli(Args) + output_dir = Path(f"data/{args.env_name}_episodes_{args.num_episodes}") + output_dir.mkdir(parents=True, exist_ok=True) + + pool_args = [ + (args.env_name, args.min_episode_length, args.seed, i, output_dir) + for i in range(args.num_episodes) + ] + + print(f"Number of processes: {mp.cpu_count()}") + + with mp.Pool(processes=mp.cpu_count()) as pool: + results = pool.map(generate_episode, pool_args) + + # Filter out None (episodes that were too short) + metadata = [r for r in results if r is not None] + np.save(output_dir / "metadata.npy", metadata) + print(f"Dataset generated with {len(metadata)} valid episodes, saving to {output_dir}") + + +if __name__ == '__main__': + start_time = time.time() + main() + end_time = time.time() + print(f"Time taken: {end_time - start_time} seconds") diff --git a/generate_dataset_gym.py b/generate_dataset_gym_single.py similarity index 61% rename from generate_dataset_gym.py rename to generate_dataset_gym_single.py index 74e70e5..b473e5b 100644 --- a/generate_dataset_gym.py +++ b/generate_dataset_gym_single.py @@ -6,16 +6,20 @@ from dataclasses import dataclass from pathlib import Path -import gym3 +import gymnasium as gym import numpy as np import tyro import time +import crafter + + @dataclass class Args: num_episodes: int = 10000 env_name: str = "Acrobot-v1" min_episode_length: int = 50 + seed: int = 42 def main(): @@ -26,25 +30,32 @@ def main(): # --- Generate episodes --- i = 0 metadata = [] + time_per_episode = [] while i < args.num_episodes: - env = gym3.vectorize_gym(num=1, env_kwargs={"id": args.env_name}) + time_start_episode = time.time() + env = gym.make(args.env_name, render_mode="rgb_array") + observation, info = env.reset(seed=args.seed) dataseq = [] # --- Run episode --- for j in range(1000): - env.act(gym3.types_np.sample(env.ac_space, bshape=(env.num,))) - rew, obs, first = env.observe() - dataseq.append(obs) - if first: + action = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(action) + dataseq.append(env.render()) + if terminated or truncated: break - + # --- Save episode --- if len(dataseq) >= args.min_episode_length: - episode_data = np.concatenate(dataseq, axis=0) + episode_data = np.stack(dataseq, axis=0) episode_path = output_dir / f"episode_{i}.npy" np.save(episode_path, episode_data.astype(np.uint8)) + time_per_episode.append(time.time() - time_start_episode) metadata.append({"path": str(episode_path), "length": len(dataseq)}) - print(f"Episode {i} completed, length: {len(dataseq)}") + if i % 5 == 0: + print(f"Episode {i} completed, length: {len(dataseq)}, time: {time_per_episode[-1]} seconds") + else: + print(f"Episode {i} completed, length: {len(dataseq)}") i += 1 else: print(f"Episode too short ({len(dataseq)}), resampling...") @@ -52,6 +63,7 @@ def main(): # --- Save metadata --- np.save(output_dir / "metadata.npy", metadata) print(f"Dataset generated with {len(metadata)} valid episodes, saving to {output_dir}") + print(f"Average time per episode: {np.mean(time_per_episode)} seconds") if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index 9e70627..db6f11b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,7 @@ procgen>=0.10.7 torch>=2.0.1 tyro>=0.8.5 wandb>=0.17.4 -gym==0.25.2 # Last version that supports backward compatibility https://github.com/openai/gym/releases/tag/0.26.0 \ No newline at end of file +gym==0.25.2 # Last version that supports backward compatibility https://github.com/openai/gym/releases/tag/0.26.0 +# To support Box2D environments +swig==4.3.1 +gymnasium[box2d] \ No newline at end of file From a9a8cad347de44283ac7cc8eafd1d1f508174d87 Mon Sep 17 00:00:00 2001 From: Alfred Date: Mon, 26 May 2025 19:32:36 +0200 Subject: [PATCH 3/3] add: logging and metadata checkpointing --- generate_dataset.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/generate_dataset.py b/generate_dataset.py index 39251ab..5500bf7 100644 --- a/generate_dataset.py +++ b/generate_dataset.py @@ -6,22 +6,27 @@ from dataclasses import dataclass from pathlib import Path import time +import logging from gym3 import types_np import numpy as np from procgen import ProcgenGym3Env import tyro +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) @dataclass class Args: num_episodes: int = 10000 env_name: str = "coinrun" min_episode_length: int = 50 - + output_dir: str = "data" args = tyro.cli(Args) -output_dir = Path(f"data/{args.env_name}_episodes") + +output_dir = Path(f"{args.output_dir}/{args.env_name}_episodes") output_dir.mkdir(parents=True, exist_ok=True) # --- Generate episodes --- @@ -34,7 +39,7 @@ class Args: dataseq = [] # --- Run episode --- - print(f"Generating episode {i}...") + logger.info(f"Generating episode {i}...") start_time = time.time() for j in range(1000): env.act(types_np.sample(env.ac_space, bshape=(env.num,))) @@ -49,15 +54,19 @@ class Args: episode_path = output_dir / f"episode_{i}.npy" np.save(episode_path, episode_data.astype(np.uint8)) metadata.append({"path": str(episode_path), "length": len(dataseq)}) - # print time per episode + # log time per episode times.append(time.time() - start_time) - print(f"Episode {i} completed, length: {len(dataseq)}, time: {time.time() - start_time}") + logger.info(f"Episode {i} completed, length: {len(dataseq)}, time: {time.time() - start_time}") i += 1 + + # Save metadata every 1000 episodes + if i % 1000 == 0: + np.save(output_dir / f"metadata_episodes_{i}.npy", metadata) else: - print(f"Episode too short ({len(dataseq)}), resampling...") + logger.warning(f"Episode too short ({len(dataseq)}), resampling...") # --- Save metadata --- np.save(output_dir / "metadata.npy", metadata) -print(f"Dataset generated with {len(metadata)} valid episodes") -print(f"Average time per episode: {np.mean(times)}") +logger.info(f"Dataset generated with {len(metadata)} valid episodes") +logger.info(f"Average time per episode: {np.mean(times)}")