Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions .github/workflows/nv-pre-compile-ops.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,20 @@ jobs:
unit-tests:
runs-on: ubuntu-24.04
container:
image: deepspeed/gh-builder:ubuntu1804-py38-torch1131-cu116
image: nvidia/cuda:12.6.3-devel-ubuntu22.04

steps:
- name: Install system dependencies
run: |
apt-get update && apt-get install -y git python3 python3-pip libaio-dev ninja-build
ln -sf /usr/bin/python3 /usr/bin/python

- uses: actions/checkout@v4

- name: Install PyTorch
run: |
pip install torch==2.10.0 --index-url https://download.pytorch.org/whl/cu126

- name: environment
run: |
which python
Expand All @@ -36,7 +45,7 @@ jobs:
#python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: Compile DeepSpeed Ops
run: |
DS_ACCELERATOR=cuda DS_ENABLE_NINJA=1 TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0" DS_BUILD_OPS=1 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_FP_QUANTIZER=0 DS_BUILD_CUTLASS_OPS=0 DS_BUILD_GDS=0 DS_BUILD_RAGGED_DEVICE_OPS=0 DS_BUILD_EVOFORMER_ATTN=0 DS_BUILD_DEEP_COMPILE=0 pip3 install .
DS_ACCELERATOR=cuda DS_ENABLE_NINJA=1 TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" DS_BUILD_OPS=1 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_FP_QUANTIZER=0 DS_BUILD_CUTLASS_OPS=0 DS_BUILD_GDS=0 DS_BUILD_RAGGED_DEVICE_OPS=0 DS_BUILD_EVOFORMER_ATTN=0 DS_BUILD_DEEP_COMPILE=0 pip3 install .
- name: DS Report
run: |
ds_report
DS_ACCELERATOR=cuda ds_report
4 changes: 0 additions & 4 deletions accelerator/abstract_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,6 @@ def supported_dtypes(self):
...

# Misc
@abc.abstractmethod
def amp(self):
...

@abc.abstractmethod
def is_available(self):
...
Expand Down
3 changes: 0 additions & 3 deletions accelerator/cpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,6 @@ def available_memory(self, device_index=None):
return psutil.virtual_memory().available

# Misc
def amp(self):
return torch.cpu.amp

def is_available(self):
return True

Expand Down
5 changes: 0 additions & 5 deletions accelerator/cuda_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,6 @@ def supported_dtypes(self):
return supported_dtypes

# Misc
def amp(self):
if hasattr(torch.cuda, 'amp'):
return torch.cuda.amp
return None

def is_available(self):
return torch.cuda.is_available()

Expand Down
3 changes: 0 additions & 3 deletions accelerator/hpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,6 @@ def supported_dtypes(self):
return supported_dtypes

# Misc
def amp(self):
return None

def is_available(self):
return self.hpu.is_available()

Expand Down
5 changes: 0 additions & 5 deletions accelerator/mlu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,6 @@ def supported_dtypes(self):
return supported_dtypes

# Misc
def amp(self):
if hasattr(torch.mlu, 'amp'):
return torch.mlu.amp
return None

def is_available(self):
return torch.mlu.is_available()

Expand Down
3 changes: 0 additions & 3 deletions accelerator/mps_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,6 @@ def supported_dtypes(self):
return [torch.float]

# Misc
def amp(self):
return

def is_available(self):
return hasattr(torch.backends, "mps") and torch.backends.mps.is_available()

Expand Down
5 changes: 0 additions & 5 deletions accelerator/npu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,6 @@ def supported_dtypes(self):
return [torch.float, torch.half, torch.bfloat16]

# Misc
def amp(self):
if hasattr(torch.npu, 'amp'):
return torch.npu.amp
return None

def is_available(self):
return torch.npu.is_available()

Expand Down
5 changes: 0 additions & 5 deletions accelerator/sdaa_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,6 @@ def supported_dtypes(self):
return supported_dtypes

# Misc
def amp(self):
if hasattr(torch.sdaa, 'amp'):
return torch.sdaa.amp
return None

def is_available(self):
return torch.sdaa.is_available()

Expand Down
3 changes: 0 additions & 3 deletions accelerator/xpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,6 @@ def available_memory(self, device_index=None):
return self.total_memory(device_index) - self.memory_allocated(device_index)

# Misc
def amp(self):
return torch.amp

def is_available(self):
return torch.xpu.is_available()

Expand Down
15 changes: 2 additions & 13 deletions deepspeed/runtime/zero/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from torch.nn.parameter import Parameter
from torch.nn import init
from torch.nn.modules.module import Module
from deepspeed.runtime.utils import noop_decorator
from deepspeed import comm as dist
from deepspeed.accelerator import get_accelerator

Expand All @@ -33,18 +32,8 @@ def print_rank_0(message, debug=False, force=False):
print(message)


try:
# Fix `torch.[device].amp.custom_fwd/bwd` FutureWarning in torch 2.4
if hasattr(torch, 'amp') and hasattr(torch.amp, 'custom_fwd') and hasattr(torch.amp, 'custom_bwd'):
autocast_custom_fwd = functools.partial(torch.amp.custom_fwd, device_type=get_accelerator().device_name())
autocast_custom_bwd = functools.partial(torch.amp.custom_bwd, device_type=get_accelerator().device_name())
else:
# original implementation
autocast_custom_fwd = get_accelerator().amp().custom_fwd
autocast_custom_bwd = get_accelerator().amp().custom_bwd
except (ImportError, AttributeError) as exp:
autocast_custom_fwd = noop_decorator
autocast_custom_bwd = noop_decorator
autocast_custom_fwd = functools.partial(torch.amp.custom_fwd, device_type=get_accelerator().device_name())
autocast_custom_bwd = functools.partial(torch.amp.custom_bwd, device_type=get_accelerator().device_name())


class LinearFunctionForZeroStage3(torch.autograd.Function):
Expand Down