Skip to content

Maia2 not able to run outside of cuda #8

@chip8fan

Description

@chip8fan

I have a training script here:

if name == "main":
from maia2 import model, train, utils
maia2_model = model.from_pretrained(type="rapid", device="cpu")
cfg = utils.parse_args(cfg_file_path='./maia2_models/config.yaml')
train.run(cfg)

and the output is here:

me@mac maia2 % python3 run.py
Downloading model for rapid games.
Downloading...
From (original): https://drive.google.com/uc?id=1gbC1-c7c0EQOPPAVpGWubezeEW8grVwc
From (redirected): https://drive.google.com/uc?id=1gbC1-c7c0EQOPPAVpGWubezeEW8grVwc&confirm=t&uuid=1ffaed63-6012-4d70-a8d0-c2657d4180a1
To: /Volumes/Lichess/maia2/maia2_models/rapid_model.pt
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 280M/280M [00:07<00:00, 38.2MB/s]
Downloading...
From: https://drive.google.com/uc?id=1GQTskYMVMubNwZH2Bi6AmevI15CS6gk0
To: /Volumes/Lichess/maia2/maia2_models/config.yaml
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 701/701 [00:00<00:00, 1.86MB/s]
Model for rapid games loaded to cpu.
Configurations:
data_root: /datadrive2/lichess_data
seed: 42
num_workers: 16
verbose: 1
max_epochs: 3
max_ply: 300
clock_threshold: 30
chunk_size: 20000
start_year: 2018
start_month: 5
end_year: 2023
end_month: 11
from_checkpoint: False
checkpoint_epoch: 0
checkpoint_year: 2018
checkpoint_month: 5
num_cpu_left: 16
queue_length: 2
lr: 0.0001
wd: 1e-05
batch_size: 8192
first_n_moves: 10
last_n_moves: 10
dim_cnn: 256
dim_vit: 1024
num_blocks_cnn: 5
num_blocks_vit: 2
input_channels: 18
vit_length: 8
elo_dim: 128
side_info: True
side_info_coefficient: 1.0
value: True
value_coefficient: 1.0
max_games_per_elo_range: 20
MAIA2Model(
(chess_cnn): ChessResNet(
(conv1): Conv2d(18, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(layers): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
(2): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
(3): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
(4): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout): Dropout(p=0.5, inplace=False)
)
)
(conv_last): Conv2d(256, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn_last): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(to_patch_embedding): Sequential(
(0): Linear(in_features=64, out_features=1024, bias=True)
(1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
(transformer): Transformer(
(norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(layers): ModuleList()
(elo_layers): ModuleList(
(0-1): 2 x ModuleList(
(0): EloAwareAttention(
(norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(attend): Softmax(dim=-1)
(dropout): Dropout(p=0.1, inplace=False)
(to_qkv): Linear(in_features=1024, out_features=3072, bias=False)
(elo_query): Linear(in_features=256, out_features=1024, bias=False)
(to_out): Sequential(
(0): Linear(in_features=1024, out_features=1024, bias=True)
(1): Dropout(p=0.1, inplace=False)
)
)
(1): FeedForward(
(net): Sequential(
(0): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(1): Linear(in_features=1024, out_features=1024, bias=True)
(2): GELU(approximate='none')
(3): Dropout(p=0.1, inplace=False)
(4): Linear(in_features=1024, out_features=1024, bias=True)
(5): Dropout(p=0.1, inplace=False)
)
)
)
)
)
(fc_1): Linear(in_features=1024, out_features=1880, bias=True)
(fc_2): Linear(in_features=1024, out_features=2021, bias=True)
(fc_3): Linear(in_features=128, out_features=1, bias=True)
(fc_3_1): Linear(in_features=1024, out_features=128, bias=True)
(elo_embedding): Embedding(11, 128)
(dropout): Dropout(p=0.1, inplace=False)
(last_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
Traceback (most recent call last):
File "/Volumes/Lichess/maia2/run.py", line 5, in
train.run(cfg)
File "/Volumes/Lichess/maia2/maia2/train.py", line 33, in run
model = model.cuda()
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 916, in cuda
return self._apply(lambda t: t.cuda(device))
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 780, in _apply
module._apply(fn)
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 780, in _apply
module._apply(fn)
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 805, in _apply
param_applied = fn(param)
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/nn/modules/module.py", line 916, in
return self._apply(lambda t: t.cuda(device))
File "/Users/me/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch/cuda/init.py", line 305, in _lazy_init
raise AssertionError("Torch not compiled with CUDA enabled")
AssertionError: Torch not compiled with CUDA enabled

Are there any alternatives to having cuda? (on m4 mac)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions