diff --git a/dql_plots/DeepQ.py b/dql_plots/DeepQ.py new file mode 100644 index 0000000..14af649 --- /dev/null +++ b/dql_plots/DeepQ.py @@ -0,0 +1,184 @@ +""" +This is the DQN implementation with Convolutional Q-Network + +In this we have: +QNetwork: a Convolusional Network to estimate Q values from image input +DQN: the Deep Q-Learning agent class that handles action slection, training, target network updates and epsilon decay + +The important bits are: +Convolutional layers that we use for visual input processing +Epsilon greedy exploration just like in Q-Learning +Experience replay //TODO +RMSProp optimizer //TODO +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +import random +from collections import namedtuple, deque + +#Neural network we use to get the Q values from visual input(stacked frames) +class QNetwork(nn.Module): + def __init__(self, stacked_input, num_actions, activation=F.relu): + super(QNetwork, self).__init__() + #convolution 1st layer + self.layer1 = nn.Conv2d(stacked_input, 16, kernel_size=8, stride=4) # [batch_size, 4, 84, 84] -> [batch_size, 16, 20, 20] Filters = 16 + + #convolution 2nd layer + self.layer2 = nn.Conv2d(16, 32, kernel_size=4, stride=2) # [batch_size, 32, 9, 9] input from 1st layer = 16, filters =32 + + #Ouput size after 2nd layer + self.flatten_img = 32 * 9 * 9 # layer 2 gives out 32 filtered, 9x9 sized batch_size times -> [batch_size, 2592] + + #First fully connected layer + self.fully_connected_layer1 = nn.Linear(self.flatten_img, 256) #flattened img passed to 256 neurons + self.fully_connect_layer2 = nn.Linear(256, num_actions) #Q values [batch_size, 3] + + self.activation = activation + + def forward(self, input_img): + #apply conv layers with ReLU + input_img = F.relu(self.layer1(input_img)) + input_img = F.relu(self.layer2(input_img)) + input_img = input_img.view((-1, self.flatten_img)) + input_img = self.activation(self.fully_connected_layer1(input_img)) + input_img = self.fully_connect_layer2(input_img) + return input_img + +#Tuple for structrued replays //TODO +TrainingSample = namedtuple('TraniningSample', ('state', 'action', 'reward', 'next_state', 'terminated')) #to access the tuple using names, 'Transition' in Pytorch + +#Replay buffer for experiece replay //TODO +class ExperienceReplay: + def __init__(self, stacked_input, num_actions, capacity=int(1e5)): + self.capacity = capacity + self.sample_idx = 0 + self.samples_stored_till_now = 0 + + #initiliazing + self.state = np.zeros((capacity, *stacked_input), dtype=np.uint8) + self.action = np.zeros((capacity, *num_actions), dtype=np.int64) + self.reward = np.zeros((capacity, 1), dtype=np.float32) + self.next_state = np.zeros((capacity, *stacked_input), dtype=np.uint8) + self.terminated = np.zeros((capacity, 1), dtype=np.float32) + + #step in environment + def push(self, state, action, reward, next_state, terminated): + self.state[self.sample_idx] = state + self.action[self.sample_idx] = action + self.reward[self.sample_idx] = reward + self.next_state[self.sample_idx] = next_state + self.terminated[self.sample_idx] = terminated + + self.sample_idx = (self.sample_idx + 1) % self.capacity + self.samples_stored_till_now = min(self.samples_stored_till_now + 1, self.capacity) # rewrite the old memory idx + + #random batch + def sample(self, batch_size): + idx = np.random.randint(0, self.samples_stored_till_now, batch_size) + batch = TrainingSample( + state=torch.FloatTensor(self.state[idx]), + action=torch.LongTensor(self.action[idx]), + reward=torch.FloatTensor(self.reward[idx]), + next_state=torch.FloatTensor(self.next_state[idx]), + terminated=torch.FloatTensor(self.terminated[idx]), + ) + return batch + + #how much samples are stored + def __len__(self): + return self.samples_stored_till_now + +#The actual Deep Q-Learning Network agent +class DQN: + def __init__( + self, + stacked_input, + num_actions, + alpha=0.00025, + epsilon=1.0, + minimum_epsilon=0.1, + discount_factor=0.99, + batch_size=32, + warmup_steps=5000, + ExperienceReplay_memory=int(5e4), + target_update_interval=10000, + ): + self.num_actions = num_actions + self.epsilon = epsilon + self.discount_factor = discount_factor + self.batch_size = batch_size + self.warmup_steps = warmup_steps + self.target_update_interval = target_update_interval + + #Q Network; input => stacked frames, action; output => QValues + self.network = QNetwork(stacked_input[0], num_actions) + + #Target Network + self.target_network = QNetwork(stacked_input[0], num_actions) + + #update the weights in Target Network + self.target_network.load_state_dict(self.network.state_dict()) + + #optimizer; reference = Deepmind DQN + self.optimizer = torch.optim.RMSprop(self.network.parameters(), alpha) + + self.buffer = ExperienceReplay(stacked_input, (1, ), ExperienceReplay_memory) #initialized Experience Replay + + self.total_steps = 0 + self.epsilon_decay = (epsilon - minimum_epsilon) / 1e6 #Epsilon Decay + + #Epsilon Greedy + @torch.no_grad() + def act(self, input_img, training=True): + self.network.eval() if not training else self.network.train() + if training and ((np.random.rand() < self.epsilon) or (self.total_steps < self.warmup_steps)): + action = np.random.randint(0, self.num_actions) + else: + input_img = torch.from_numpy(input_img).float().unsqueeze(0) + q = self.network(input_img) + action = torch.argmax(q).item() + return action + #Perform a training step + def learn(self): + current_state, action, reward, next_state, terminated = self.buffer.sample(self.batch_size) # random batch of past transitions from replay buffer and move them to GPU. + + # Q(s', a) + next_q = self.target_network(next_state).detach() + #Get target Q-values from network + target_q = reward + (1. - terminated) * self.discount_factor * next_q.max(dim=1, keepdim=True).values # target = immediate reward + gamma * Q(s', a) + #Loss + loss = F.mse_loss(self.network(current_state).gather(1, action.long()), target_q) + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + result = { + 'total_steps': self.total_steps, + 'value_loss': loss.item() + } + return result + #Proccess a single transition and update networks + def process(self, transition): + result = {} + self.total_steps += 1 + + #sotre transition + self.buffer.push(*transition) + + if self.total_steps > self.warmup_steps: + result = self.learn() + + # update weights + if self.total_steps % self.target_update_interval == 0: + self.target_network.load_state_dict(self.network.state_dict()) + + #decay epsilon + self.epsilon -= self.epsilon_decay + + return result + + #this needs to be flushed out //TODO \ No newline at end of file diff --git a/dql_plots/ImageProcessing.py b/dql_plots/ImageProcessing.py new file mode 100644 index 0000000..9941b93 --- /dev/null +++ b/dql_plots/ImageProcessing.py @@ -0,0 +1,95 @@ +""" +Observation Wrapper for our Reinforcement Learning Env + +This file wraps around the Gymnasium env and: +Converts RGB frames to 84x84 greyscale for processing +It also stacks multiple consecutive frames (usually 4) to capture temporal dynamics. +Repeats the same action for a few frames to reduce computational expenses and to smoothen + +Furthermore, some of the key features are: +Frame Stacking +Greyscale conversion +Frame skipping for faster performance + +""" + + + +import cv2 +import numpy as np +import gymnasium as gym +import matplotlib.pyplot as plt +from collections import deque + + +class Observation_processing(gym.Wrapper): + def __init__(self, env, repeat_action=3, stack_frames=4, do_nothing_frames=50): + super(Observation_processing, self).__init__(env) + self.do_nothing_frames = do_nothing_frames + self.repeat_action = repeat_action #same action for these frames => easy computation + self.stack_frames = stack_frames #predicting motion of car + + self.frames = deque(maxlen=self.stack_frames) #refresh the frames + + def rgb_to_grayscale(self, img): + img = cv2.resize(img, dsize = (84,84)) + img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + return img + + # reset episode => do nothing -> grayscale -> stack + def reset(self): + state,info = self.env.reset() + + # for this zoom in phase do nothing + for _ in range(self.do_nothing_frames): + state, _, terminated, truncated, info = self.env.step(0) + #additional termination condition to avoid bad terminal state + if terminated or truncated: + state, info = self.env.reset() + + state = self.rgb_to_grayscale(state) + + #deque 4 frames + for _ in range(self.stack_frames): + self.frames.append(state) + + # stack frames + stacked_state = np.stack(self.frames, axis=0) + + return stacked_state, info # [stack_frames=4, 84, 84] + + #take an action + def step(self, action): + total_reward = 0 #initializaed + terminated = False + truncated = False + + #repeat action for repeat_action + for _ in range(self.repeat_action): + state, reward, terminated, truncated, info = self.env.step(action) + total_reward += reward + + if terminated or truncated: + break + + state = self.rgb_to_grayscale(state) + self.frames.append(state) #store new frames; (t-2, t-1, t, t+1) + stacked_state = np.stack(self.frames, axis=0) + + return stacked_state, total_reward, terminated, truncated, info + + + # ============================================================ +# Usage Example: +# env = gym.make("CarRacing-v3", render_mode="rgb_array") +# wrapped_env = Observation_processing(env) +# state, info = wrapped_env.reset() +# next_state, reward, done, truncated, info = wrapped_env.step(action) +# +# This wrapper ensures the input to your neural network is: +# - [stack_frames, 84, 84] shaped (default: [4, 84, 84]) +# - temporally aware (via stacked grayscale frames) +# - computationally efficient (via frame skipping) +# +# Useful for: Deep Q-Networks, Policy Gradient methods, or any CNN-based RL pipeline. +# ============================================================ \ No newline at end of file diff --git a/dql_plots/main.py b/dql_plots/main.py new file mode 100644 index 0000000..947fa3c --- /dev/null +++ b/dql_plots/main.py @@ -0,0 +1,133 @@ +""" +DQN training script for our env + +This main script inits and trains our DQ network agent, using: +Preprocessed CarRacing env with (grayscale, stacked frames, frame skipping) +CNN based Q-Network for learning from visual input +Exp replay //TODO +EPsilon-greedy exploration with steep decay + +Tracks rewards per episode and can be visaulized +""" + + + +import gymnasium as gym #type:ignore +import matplotlib.pyplot as plt +import numpy as np +from ImageProcessing import Observation_processing +from DeepQ import DQN +from PIL import Image +import imageio +import os +from datetime import datetime + +# Create directories for saving outputs +os.makedirs('training_plots', exist_ok=True) +os.makedirs('training_gifs', exist_ok=True) + +# Create the Car Racing environment +env = gym.make("CarRacing-v3", render_mode="rgb_array", continuous=False) #change render_mode to human if we want to visualize +env = Observation_processing(env) + +max_steps = int(2e4) + +agent = DQN(stacked_input = (4, 84, 84), num_actions = env.action_space.n) + +## TRAINING LOOP +num_episodes = 1000 +episode_rewards = [] + +moving_avg_rewards = [] +window_size = 20 # for moving average + +# For recording GIFs +record_every = 50 # record a GIF every N episodes +gif_frames = [] + +for episode in range(num_episodes): + (current_state, _), done = env.reset(), False + total_reward = 0 + + # For GIF recording + if episode % record_every == 0: + episode_frames = [] + + while not done: + action = agent.act(current_state, training=True) + next_state, reward, terminated, truncated, info = env.step(action) + done = terminated or truncated + + # Record frame if this is a recording episode + if episode % record_every == 0: + frame = env.env.render() # Get the RGB frame + episode_frames.append(frame) + + agent.process((current_state, [action], [reward], next_state, [done])) + current_state = next_state + total_reward += reward + + episode_rewards.append(total_reward) + + # Calculate moving average + if len(episode_rewards) >= window_size: + moving_avg = np.mean(episode_rewards[-window_size:]) + moving_avg_rewards.append(moving_avg) + + # Save GIF for this episode if it's a recording episode + if episode % record_every == 0 and episode_frames: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + gif_path = f'training_gifs/episode_{episode}_{timestamp}.gif' + + # Resize frames to make GIF smaller (optional) + resized_frames = [Image.fromarray(frame).resize((400, 300)) for frame in episode_frames] + + # Save as GIF + imageio.mimsave(gif_path, resized_frames, duration=0.1) + print(f"Saved GIF for episode {episode} at {gif_path}") + + print(f"Episode {episode+1} | Reward: {total_reward:.2f} | Epsilon: {agent.epsilon:.3f}") + + # Plot and save training progress periodically + if episode % 10 == 0 or episode == num_episodes - 1: + plt.figure(figsize=(12, 6)) + + # Plot raw rewards + plt.subplot(1, 2, 1) + plt.plot(episode_rewards, label='Episode Reward', alpha=0.3) + if moving_avg_rewards: + plt.plot(range(window_size-1, len(episode_rewards)), moving_avg_rewards, label=f'{window_size}-episode MA', color='red') + plt.xlabel('Episode') + plt.ylabel('Reward') + plt.title('Training Progress') + plt.legend() + plt.grid(True) + + # Plot histogram of recent rewards + plt.subplot(1, 2, 2) + recent_rewards = episode_rewards[-min(100, len(episode_rewards)):] + plt.hist(recent_rewards, bins=20, edgecolor='black') + plt.xlabel('Reward') + plt.ylabel('Frequency') + plt.title('Recent Reward Distribution') + plt.grid(True) + + plt.tight_layout() + + # Save the plot + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + plot_path = f'training_plots/training_progress_{timestamp}.png' + plt.savefig(plot_path) + plt.close() + print(f"Saved training plot at {plot_path}") + +#starting 4 grayscale images + +# state, _ = env.reset() +# print("The shape of an observation: ", state.shape) + +# fig, axes = plt.subplots(1, 4, figsize=(20, 5)) +# for i in range(4): +# axes[i].imshow(state[i], cmap='gray') +# axes[i].axis('off') +# plt.show() \ No newline at end of file diff --git a/dql_plots/requirements.txt b/dql_plots/requirements.txt new file mode 100644 index 0000000..f1e74d8 --- /dev/null +++ b/dql_plots/requirements.txt @@ -0,0 +1,22 @@ +Box2D==2.3.10 +cloudpickle==3.1.1 +contourpy==1.3.1 +cycler==0.12.1 +Farama-Notifications==0.0.4 +fonttools==4.56.0 +gymnasium==1.1.1 +kiwisolver==1.4.8 +matplotlib==3.10.1 +numpy==2.2.4 +packaging==24.2 +pillow==11.1.0 +pygame==2.6.1 +pyparsing==3.2.1 +python-dateutil==2.9.0.post0 +setuptools==76.0.0 +six==1.17.0 +swig==4.3.0 +typing_extensions==4.12.2 +opencv-python==4.9.0.80 # Added OpenCV +torch==2.2.2 # Added PyTorch +imageio==2.34.1 # Added for GIF creation \ No newline at end of file diff --git a/dql_plots/training_gifs/episode_0_20250410_140946.gif b/dql_plots/training_gifs/episode_0_20250410_140946.gif new file mode 100644 index 0000000..8d414d1 Binary files /dev/null and b/dql_plots/training_gifs/episode_0_20250410_140946.gif differ diff --git a/dql_plots/training_gifs/episode_100_20250410_151106.gif b/dql_plots/training_gifs/episode_100_20250410_151106.gif new file mode 100644 index 0000000..52118d4 Binary files /dev/null and b/dql_plots/training_gifs/episode_100_20250410_151106.gif differ diff --git a/dql_plots/training_gifs/episode_150_20250410_153400.gif b/dql_plots/training_gifs/episode_150_20250410_153400.gif new file mode 100644 index 0000000..9076791 Binary files /dev/null and b/dql_plots/training_gifs/episode_150_20250410_153400.gif differ diff --git a/dql_plots/training_gifs/episode_200_20250410_160058.gif b/dql_plots/training_gifs/episode_200_20250410_160058.gif new file mode 100644 index 0000000..03aa35d Binary files /dev/null and b/dql_plots/training_gifs/episode_200_20250410_160058.gif differ diff --git a/dql_plots/training_gifs/episode_250_20250410_163033.gif b/dql_plots/training_gifs/episode_250_20250410_163033.gif new file mode 100644 index 0000000..ebfc3ec Binary files /dev/null and b/dql_plots/training_gifs/episode_250_20250410_163033.gif differ diff --git a/dql_plots/training_gifs/episode_300_20250410_170308.gif b/dql_plots/training_gifs/episode_300_20250410_170308.gif new file mode 100644 index 0000000..ec5f7dc Binary files /dev/null and b/dql_plots/training_gifs/episode_300_20250410_170308.gif differ diff --git a/dql_plots/training_gifs/episode_50_20250410_142923.gif b/dql_plots/training_gifs/episode_50_20250410_142923.gif new file mode 100644 index 0000000..a314a5a Binary files /dev/null and b/dql_plots/training_gifs/episode_50_20250410_142923.gif differ diff --git a/dql_plots/training_plots/training_progress_20250410_140957.png b/dql_plots/training_plots/training_progress_20250410_140957.png new file mode 100644 index 0000000..f7a4c64 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_140957.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_141222.png b/dql_plots/training_plots/training_progress_20250410_141222.png new file mode 100644 index 0000000..23fbc4d Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_141222.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_141558.png b/dql_plots/training_plots/training_progress_20250410_141558.png new file mode 100644 index 0000000..750abb5 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_141558.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_142019.png b/dql_plots/training_plots/training_progress_20250410_142019.png new file mode 100644 index 0000000..8f82ff9 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_142019.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_142447.png b/dql_plots/training_plots/training_progress_20250410_142447.png new file mode 100644 index 0000000..3c851b6 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_142447.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_142933.png b/dql_plots/training_plots/training_progress_20250410_142933.png new file mode 100644 index 0000000..0bb92bd Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_142933.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_143538.png b/dql_plots/training_plots/training_progress_20250410_143538.png new file mode 100644 index 0000000..89c5cfd Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_143538.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_144213.png b/dql_plots/training_plots/training_progress_20250410_144213.png new file mode 100644 index 0000000..47338ef Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_144213.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_145709.png b/dql_plots/training_plots/training_progress_20250410_145709.png new file mode 100644 index 0000000..53c1d9a Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_145709.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_150550.png b/dql_plots/training_plots/training_progress_20250410_150550.png new file mode 100644 index 0000000..e7bb1ae Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_150550.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_151122.png b/dql_plots/training_plots/training_progress_20250410_151122.png new file mode 100644 index 0000000..537ae34 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_151122.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_151546.png b/dql_plots/training_plots/training_progress_20250410_151546.png new file mode 100644 index 0000000..3afcd4c Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_151546.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_152004.png b/dql_plots/training_plots/training_progress_20250410_152004.png new file mode 100644 index 0000000..2737f84 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_152004.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_152420.png b/dql_plots/training_plots/training_progress_20250410_152420.png new file mode 100644 index 0000000..0457966 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_152420.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_152847.png b/dql_plots/training_plots/training_progress_20250410_152847.png new file mode 100644 index 0000000..294bd6a Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_152847.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_153420.png b/dql_plots/training_plots/training_progress_20250410_153420.png new file mode 100644 index 0000000..ce23120 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_153420.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_153848.png b/dql_plots/training_plots/training_progress_20250410_153848.png new file mode 100644 index 0000000..7460704 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_153848.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_154250.png b/dql_plots/training_plots/training_progress_20250410_154250.png new file mode 100644 index 0000000..97880c7 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_154250.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_154830.png b/dql_plots/training_plots/training_progress_20250410_154830.png new file mode 100644 index 0000000..66e38e1 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_154830.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_155452.png b/dql_plots/training_plots/training_progress_20250410_155452.png new file mode 100644 index 0000000..45ea740 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_155452.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_160114.png b/dql_plots/training_plots/training_progress_20250410_160114.png new file mode 100644 index 0000000..8fd85f8 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_160114.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_160655.png b/dql_plots/training_plots/training_progress_20250410_160655.png new file mode 100644 index 0000000..6cc79b9 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_160655.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_161249.png b/dql_plots/training_plots/training_progress_20250410_161249.png new file mode 100644 index 0000000..70a590e Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_161249.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_162005.png b/dql_plots/training_plots/training_progress_20250410_162005.png new file mode 100644 index 0000000..b3fdd85 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_162005.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_162550.png b/dql_plots/training_plots/training_progress_20250410_162550.png new file mode 100644 index 0000000..5c387c9 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_162550.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_163041.png b/dql_plots/training_plots/training_progress_20250410_163041.png new file mode 100644 index 0000000..e8e3e42 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_163041.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_163621.png b/dql_plots/training_plots/training_progress_20250410_163621.png new file mode 100644 index 0000000..524e5ea Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_163621.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_164329.png b/dql_plots/training_plots/training_progress_20250410_164329.png new file mode 100644 index 0000000..abc8d0c Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_164329.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_165013.png b/dql_plots/training_plots/training_progress_20250410_165013.png new file mode 100644 index 0000000..b8e12c4 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_165013.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_165740.png b/dql_plots/training_plots/training_progress_20250410_165740.png new file mode 100644 index 0000000..130ac88 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_165740.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_170317.png b/dql_plots/training_plots/training_progress_20250410_170317.png new file mode 100644 index 0000000..8197a6b Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_170317.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_170949.png b/dql_plots/training_plots/training_progress_20250410_170949.png new file mode 100644 index 0000000..204f5a9 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_170949.png differ diff --git a/dql_plots/training_plots/training_progress_20250410_171617.png b/dql_plots/training_plots/training_progress_20250410_171617.png new file mode 100644 index 0000000..b433547 Binary files /dev/null and b/dql_plots/training_plots/training_progress_20250410_171617.png differ