RL_MultiHeadAttention/evaluate.py at main · atefar2/RL_MultiHeadAttention · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
#!/usr/bin/env python3
"""
Evaluation script for RL portfolio allocation using Stable Baselines 3.
Compatible with attention-based policies and gymnasium environments.
"""

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from stable_baselines3 import PPO, SAC, TD3
from stable_baselines3.common.monitor import Monitor
from typing import List, Dict, Any
import warnings
warnings.filterwarnings('ignore')

import config
from enviorment import PortfolioEnv


def evaluate_policy_sb3(model, env, num_episodes=1, deterministic=True, render=False):
    """
    Evaluate a Stable Baselines 3 policy and collect detailed episode data.

    Args:
        model: Trained SB3 model
        env: Portfolio environment
        num_episodes: Number of episodes to run
        deterministic: Use deterministic actions
        render: Whether to render the environment

    Returns:
        List of episode data dictionaries
    """
    all_episode_data = []

    for episode in range(num_episodes):
        print(f"Running evaluation episode {episode + 1}/{num_episodes}...")

        # Reset environment
        obs, info = env.reset()

        episode_data = {
            "timestamps": [],
            "values": [],
            "allocations": [],
            "market_data": [],
            "rewards": [],
            "actions": [],
            "observations": []
        }

        # Store initial state
        episode_data["timestamps"].append(env.current_time)
        episode_data["values"].append(env.current_value)
        episode_data["allocations"].append(env.money_split_ratio.copy())
        episode_data["market_data"].append(env.dfslice.copy())
        episode_data["rewards"].append(0.0)  # Initial reward is 0
        episode_data["actions"].append([0.0] * (len(config.COINS) + 1))  # Initial action
        episode_data["observations"].append(obs.copy())

        terminated = truncated = False
        step = 0

        while not (terminated or truncated):
            # Get action from the model
            action, _ = model.predict(obs, deterministic=deterministic)

            # Apply action to environment
            obs, reward, terminated, truncated, info = env.step(action)
            step += 1

            # Store step data
            episode_data["timestamps"].append(env.current_time)
            episode_data["values"].append(env.current_value)
            episode_data["allocations"].append(env.money_split_ratio.copy())
            episode_data["market_data"].append(env.dfslice.copy())
            episode_data["rewards"].append(reward)
            episode_data["actions"].append(action.copy())
            episode_data["observations"].append(obs.copy())

            if render:
                env.render()

            # Progress indicator
            if step % 100 == 0:
                print(f"  Step {step}, Portfolio Value: ${env.current_value:.2f}, Reward: {reward:.4f}")

        # Consolidate market data into a single DataFrame
        market_data_list = episode_data["market_data"]
        if market_data_list:
            episode_data["market_data"] = pd.concat(market_data_list, ignore_index=True)
        else:
            episode_data["market_data"] = pd.DataFrame()

        # Calculate episode statistics
        total_return = episode_data["values"][-1] - episode_data["values"][0]
        percent_return = (total_return / episode_data["values"][0]) * 100
        total_reward = sum(episode_data["rewards"])

        print(f"  Episode {episode + 1} completed:")
        print(f"    Steps: {step}")
        print(f"    Total Return: ${total_return:.2f} ({percent_return:.2f}%)")
        print(f"    Total Reward: {total_reward:.4f}")
        print(f"    Final Portfolio Value: ${episode_data['values'][-1]:.2f}")

        all_episode_data.append(episode_data)

    return all_episode_data


def plot_evaluation_sb3(episode_data, save_plots=True):
    """
    Plot comprehensive evaluation results with SB3 data.

    Args:
        episode_data: Episode data dictionary from evaluate_policy_sb3
        save_plots: Whether to save plots to files
    """

    timestamps = episode_data["timestamps"]
    portfolio_values = episode_data["values"]
    rewards = episode_data["rewards"]

    # Create timezone-naive datetime index
    datetime_index = pd.to_datetime(pd.Index(timestamps)).tz_localize(None)

    # Create allocations DataFrame
    allocations_df = pd.DataFrame(
        episode_data["allocations"],
        columns=["Cash"] + config.COINS,
        index=datetime_index
    )

    portfolio_value_series = pd.Series(portfolio_values, index=datetime_index)
    rewards_series = pd.Series(rewards, index=datetime_index)

    # --- Plot 1: Portfolio Value and Cumulative Rewards ---
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), sharex=True)

    # Portfolio value
    portfolio_value_series.plot(ax=ax1, label='Portfolio Value', color='blue', linewidth=2)
    ax1.set_ylabel('Portfolio Value ($)')
    ax1.set_title('Portfolio Performance Over Episode')
    ax1.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax1.legend()

    # Calculate and display performance metrics
    start_val = portfolio_values[0]
    end_val = portfolio_values[-1]
    percent_return = ((end_val - start_val) / start_val) * 100
    max_val = max(portfolio_values)
    min_val = min(portfolio_values)
    max_drawdown = ((max_val - min_val) / max_val) * 100

    performance_text = f'Return: {percent_return:.2f}%\nMax Drawdown: {max_drawdown:.2f}%'
    ax1.text(0.02, 0.95, performance_text, transform=ax1.transAxes,
             verticalalignment='top', bbox=dict(boxstyle='round,pad=0.5', fc='lightgreen', alpha=0.7))

    # Cumulative rewards
    cumulative_rewards = np.cumsum(rewards)
    ax2.plot(datetime_index, cumulative_rewards, label='Cumulative Reward', color='red', linewidth=2)
    ax2.set_ylabel('Cumulative Reward')
    ax2.set_xlabel('Time')
    ax2.set_title('Cumulative Rewards Over Episode')
    ax2.grid(True, which='both', linestyle='--', linewidth=0.5)
    ax2.legend()

    plt.xticks(rotation=45)
    plt.tight_layout()

    if save_plots:
        output_path = os.path.join(config.LOGDIR, "evaluation_portfolio_performance.png")
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        print(f"Portfolio performance plot saved to {output_path}")
    plt.show()

    # --- Plot 2: Portfolio Allocation Strategy ---
    plt.figure(figsize=(15, 8))
    ax = plt.gca()
    allocations_df.plot.area(ax=ax, stacked=True, linewidth=0, alpha=0.8)
    ax.set_ylabel('Allocation (%)')
    ax.set_xlabel('Time')
    ax.set_title('Portfolio Allocation Strategy Over Time')
    ax.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)
    ax.legend(title='Assets', loc='upper left', bbox_to_anchor=(1.02, 1))
    ax.set_ylim(0, 1)

    # Add allocation statistics
    avg_allocations = allocations_df.mean()
    allocation_text = "Average Allocations:\n" + "\n".join([f"{asset}: {pct:.1%}" for asset, pct in avg_allocations.items()])
    ax.text(0.02, 0.98, allocation_text, transform=ax.transAxes,
            verticalalignment='top', bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.7))

    plt.xticks(rotation=45)
    plt.tight_layout()

    if save_plots:
        output_path = os.path.join(config.LOGDIR, "evaluation_allocation_strategy.png")
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        print(f"Allocation strategy plot saved to {output_path}")
    plt.show()

    # --- Plot 3: Asset Prices with Allocation Overlay ---
    market_data_df = episode_data["market_data"]
    if not market_data_df.empty:
        market_data_df['date'] = pd.to_datetime(market_data_df['date']).dt.tz_localize(None)

        fig, axes = plt.subplots(len(config.COINS), 1, figsize=(15, 4 * len(config.COINS)), sharex=True)
        if len(config.COINS) == 1:
            axes = [axes]

        fig.suptitle('Asset Prices and Allocation Decisions', fontsize=16, y=0.98)

        for ax, coin in zip(axes, config.COINS):
            coin_data = market_data_df[market_data_df['coin'] == coin].set_index('date')

            if not coin_data.empty:
                # Plot OHLC prices
                plot_cols = ['open', 'high', 'low', 'close']
                available_cols = [col for col in plot_cols if col in coin_data.columns]
                coin_data[available_cols].plot(ax=ax, linewidth=1.5, alpha=0.8)

                ax.set_title(f'{coin} Price Movement')
                ax.set_ylabel('Price ($)')
                ax.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)
                ax.legend(loc='upper left')

                # Overlay allocation on secondary y-axis
                ax2 = ax.twinx()
                allocations_df[coin].plot(ax=ax2, color='red', linewidth=2, alpha=0.7, label=f'{coin} Allocation')
                ax2.set_ylabel(f'{coin} Allocation', color='red')
                ax2.tick_params(axis='y', labelcolor='red')
                ax2.set_ylim(0, 1)
                ax2.legend(loc='upper right')

        axes[-1].set_xlabel('Time')
        plt.xticks(rotation=45)
        plt.tight_layout()

        if save_plots:
            output_path = os.path.join(config.LOGDIR, "evaluation_prices_and_allocations.png")
            plt.savefig(output_path, dpi=300, bbox_inches='tight')
            print(f"Asset prices and allocations plot saved to {output_path}")
        plt.show()

    # --- Plot 4: Action Distribution ---
    actions_df = pd.DataFrame(
        episode_data["actions"],
        columns=["Cash"] + config.COINS,
        index=datetime_index
    )

    plt.figure(figsize=(15, 6))
    ax = plt.gca()
    actions_df.plot(ax=ax, linewidth=1.5, alpha=0.8)
    ax.set_ylabel('Action Values')
    ax.set_xlabel('Time')
    ax.set_title('Raw Action Values Over Time')
    ax.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)
    ax.legend(title='Actions', loc='upper right')

    plt.xticks(rotation=45)
    plt.tight_layout()

    if save_plots:
        output_path = os.path.join(config.LOGDIR, "evaluation_actions.png")
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        print(f"Actions plot saved to {output_path}")
    plt.show()


def load_sb3_model(model_path):
    """
    Load a Stable Baselines 3 model from file.

    Args:
        model_path: Path to the saved model

    Returns:
        Loaded SB3 model
    """
    print(f"Loading model from: {model_path}")

    # Determine algorithm type from filename
    if "PPO" in model_path.upper():
        model = PPO.load(model_path)
        print("Loaded PPO model")
    elif "SAC" in model_path.upper():
        model = SAC.load(model_path)
        print("Loaded SAC model")
    elif "TD3" in model_path.upper():
        model = TD3.load(model_path)
        print("Loaded TD3 model")
    else:
        # Try PPO as default
        try:
            model = PPO.load(model_path)
            print("Loaded model as PPO (default)")
        except:
            try:
                model = SAC.load(model_path)
                print("Loaded model as SAC")
            except:
                model = TD3.load(model_path)
                print("Loaded model as TD3")

    return model


def print_evaluation_summary(all_episode_data):
    """Print a summary of evaluation results."""

    print("\n" + "="*60)
    print("EVALUATION SUMMARY")
    print("="*60)

    for i, episode_data in enumerate(all_episode_data):
        start_value = episode_data["values"][0]
        end_value = episode_data["values"][-1]
        total_return = end_value - start_value
        percent_return = (total_return / start_value) * 100
        total_reward = sum(episode_data["rewards"])
        steps = len(episode_data["timestamps"]) - 1

        print(f"\nEpisode {i+1}:")
        print(f"  Duration: {steps} steps")
        print(f"  Initial Value: ${start_value:.2f}")
        print(f"  Final Value: ${end_value:.2f}")
        print(f"  Total Return: ${total_return:.2f} ({percent_return:.2f}%)")
        print(f"  Total Reward: {total_reward:.4f}")
        print(f"  Avg Reward per Step: {total_reward/steps:.6f}")

        # Portfolio allocation summary
        allocations_df = pd.DataFrame(episode_data["allocations"], columns=["Cash"] + config.COINS)
        avg_allocations = allocations_df.mean()
        print(f"  Average Allocations:")
        for asset, allocation in avg_allocations.items():
            print(f"    {asset}: {allocation:.1%}")

    # Overall summary
    if len(all_episode_data) > 1:
        all_returns = [((ep["values"][-1] - ep["values"][0]) / ep["values"][0]) * 100
                      for ep in all_episode_data]
        print(f"\nOverall Performance:")
        print(f"  Mean Return: {np.mean(all_returns):.2f}%")
        print(f"  Std Return: {np.std(all_returns):.2f}%")
        print(f"  Min/Max Return: {np.min(all_returns):.2f}% / {np.max(all_returns):.2f}%")


if __name__ == '__main__':
    # Configuration
    print("🚀 SB3 Portfolio Evaluation Script")
    print("="*50)

    # Create log directory if it doesn't exist
    os.makedirs(config.LOGDIR, exist_ok=True)

    # Model paths to evaluate
    model_paths = [
        "models/PPO_coin_attention_medium_final.zip",
        "models/best_model.zip"
    ]

    # Check which models exist
    available_models = []
    for model_path in model_paths:
        if os.path.exists(model_path):
            available_models.append(model_path)
            print(f"✅ Found model: {model_path}")
        else:
            print(f"❌ Model not found: {model_path}")

    if not available_models:
        print("❌ No models found! Please train a model first.")
        exit(1)

    # Use the first available model
    model_path = available_models[0]
    print(f"\n📊 Evaluating model: {model_path}")

    # Load the model
    try:
        model = load_sb3_model(model_path)
        print(f"✅ Model loaded successfully")
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        exit(1)

    # Create environment
    print("🏗️  Creating evaluation environment...")
    eval_env = PortfolioEnv()
    print(f"✅ Environment created")
    print(f"   Observation space: {eval_env.observation_space}")
    print(f"   Action space: {eval_env.action_space}")

    # Run evaluation
    print(f"\n🧪 Running evaluation...")
    num_episodes = 1  # Change this to run multiple episodes
    evaluation_data = evaluate_policy_sb3(
        model, eval_env,
        num_episodes=num_episodes,
        deterministic=True,
        render=False
    )

    # Plot results
    print(f"\n📈 Generating plots...")
    plot_evaluation_sb3(evaluation_data[0], save_plots=True)

    # Print summary
    print_evaluation_summary(evaluation_data)

    print(f"\n🎉 Evaluation completed!")
    print(f"📁 Plots saved to: {config.LOGDIR}")

    # Close environment
    eval_env.close()