diff --git a/.ci/helion/install.sh b/.ci/helion/install.sh index 30d889bbf..b9f366fee 100644 --- a/.ci/helion/install.sh +++ b/.ci/helion/install.sh @@ -37,4 +37,4 @@ cd ${tritonbench_dir} python install.py --helion # Helion requires tritonbench installed as a library -pip install -e . \ No newline at end of file +pip install -e . diff --git a/benchmarks/gen_metadata/run.py b/benchmarks/gen_metadata/run.py index e8b91784a..d570df404 100644 --- a/benchmarks/gen_metadata/run.py +++ b/benchmarks/gen_metadata/run.py @@ -87,7 +87,7 @@ def run(args: argparse.Namespace): DTYPE_OPERATORS[op] = op_bench.DEFAULT_PRECISION if baseline := op_bench.has_baseline(): BASELINE_OPERATORS[op] = baseline - if op_bench.has_metric("tflops") and not op in TFLOPS_SKIP_OPERATORS: + if op_bench.has_metric("tflops") and op not in TFLOPS_SKIP_OPERATORS: TFLOPS_OPERATORS.append(op) if op_bench.has_bwd(): BACKWARD_OPERATORS.append(op) diff --git a/benchmarks/mojo_matmul/run.py b/benchmarks/mojo_matmul/run.py index f981ed496..9c36f54d6 100644 --- a/benchmarks/mojo_matmul/run.py +++ b/benchmarks/mojo_matmul/run.py @@ -4,14 +4,10 @@ pip install --pre modular --index-url https://dl.modular.com/public/nightly/python/simple/ """ -import argparse -import json -import logging import os import sys from os.path import abspath, exists -from typing import Dict, List def setup_tritonbench_cwd(): @@ -36,15 +32,13 @@ def setup_tritonbench_cwd(): from typing import Callable import max.graph as mg -import torch from max import driver, engine -from max.graph import DeviceRef, Graph, ops, TensorType, TensorValue -from max.graph.type import DType, Shape, ShapeLike +from max.graph import DeviceRef, ops, TensorType +from max.graph.type import DType from tritonbench.operators import load_opbench_by_name from tritonbench.utils.parser import get_parser -from tritonbench.utils.triton_op import register_benchmark def promote_mojo_tensor_to_fp32(mojo_tensor, dtype): diff --git a/benchmarks/nightly/gen.py b/benchmarks/nightly/gen.py index 1f9f383b5..1ec5acd37 100644 --- a/benchmarks/nightly/gen.py +++ b/benchmarks/nightly/gen.py @@ -59,7 +59,7 @@ def gen_run(operators: List[str], bwd: bool = False) -> Dict[str, Any]: cmd.append("--bwd") # add backends run_backends = list(TRITON_OPS[op].keys()) - if _has_meaningful_baseline(op) and not BASELINE_OPS[op] in run_backends: + if _has_meaningful_baseline(op) and BASELINE_OPS[op] not in run_backends: run_backends.append(BASELINE_OPS[op]) cmd.extend(["--only", ",".join(run_backends)]) out[run_name] = {} diff --git a/benchmarks/nightly/run.py b/benchmarks/nightly/run.py index 831c6bdd6..dcd53e3a0 100644 --- a/benchmarks/nightly/run.py +++ b/benchmarks/nightly/run.py @@ -107,7 +107,7 @@ def run(): logger.info(f"[nightly] logging result json file to {result_json_file}.") if args.log_scuba: log_benchmark(aggregated_obj) - logger.info(f"[nightly] logging results to scuba.") + logger.info("[nightly] logging results to scuba.") if __name__ == "__main__": diff --git a/benchmarks/power_analysis/run.py b/benchmarks/power_analysis/run.py index 179f928a3..8cc2757ff 100644 --- a/benchmarks/power_analysis/run.py +++ b/benchmarks/power_analysis/run.py @@ -7,7 +7,6 @@ import os import sys -import torch CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/pyproject.toml b/pyproject.toml index a5c7264be..cd1e47df0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,10 @@ dependencies = [ [tool.setuptools.packages.find] include = ["tritonbench*"] +[tool.ruff] +fix = true +exclude = ["submodules"] + [tool.ufmt] formatter = "ruff-api" sorter = "usort" diff --git a/tools/flash_attn/install.py b/tools/flash_attn/install.py index 8179db670..c804fed64 100644 --- a/tools/flash_attn/install.py +++ b/tools/flash_attn/install.py @@ -1,6 +1,5 @@ import os import subprocess -import sys from pathlib import Path diff --git a/tritonbench/components/power/charts.py b/tritonbench/components/power/charts.py index cfb907a36..38a340c1a 100644 --- a/tritonbench/components/power/charts.py +++ b/tritonbench/components/power/charts.py @@ -1,9 +1,6 @@ import csv import logging import os -import signal -import subprocess -import time import matplotlib.pyplot as plt diff --git a/tritonbench/components/power/power_manager.py b/tritonbench/components/power/power_manager.py index 0ea01440d..cd1e4c71c 100644 --- a/tritonbench/components/power/power_manager.py +++ b/tritonbench/components/power/power_manager.py @@ -1,5 +1,4 @@ import csv -import dataclasses import os import threading import time @@ -14,12 +13,10 @@ NVML_CLOCK_SM, NVML_FI_DEV_POWER_CURRENT_LIMIT, NVML_FI_DEV_POWER_INSTANT, - NVML_SUCCESS, NVML_TEMPERATURE_GPU, nvmlDeviceGetClock, nvmlDeviceGetFieldValues, nvmlDeviceGetHandleByIndex, - nvmlDeviceGetPerformanceState, nvmlDeviceGetTemperature, nvmlInit, nvmlShutdown, diff --git a/tritonbench/components/tasks/manager.py b/tritonbench/components/tasks/manager.py index 84872c70b..ea70c74dc 100644 --- a/tritonbench/components/tasks/manager.py +++ b/tritonbench/components/tasks/manager.py @@ -35,11 +35,8 @@ def make_instance( class_name: str, ) -> None: import importlib - import os - import traceback # required as this is in child process - from tritonbench.components.power.power_manager import PowerManager module = importlib.import_module(module_path, package=package) Ctor = getattr(module, class_name) diff --git a/tritonbench/kernels/blackwell_attention_utils.py b/tritonbench/kernels/blackwell_attention_utils.py index f22707da1..3e6ddc133 100644 --- a/tritonbench/kernels/blackwell_attention_utils.py +++ b/tritonbench/kernels/blackwell_attention_utils.py @@ -3,9 +3,6 @@ generic attention kernels. """ -import os -from functools import lru_cache - import torch import triton diff --git a/tritonbench/kernels/blackwell_triton_fused_attention.py b/tritonbench/kernels/blackwell_triton_fused_attention.py index 03174c616..e432eb29c 100644 --- a/tritonbench/kernels/blackwell_triton_fused_attention.py +++ b/tritonbench/kernels/blackwell_triton_fused_attention.py @@ -24,7 +24,6 @@ is_blackwell, is_cuda, is_hip, - is_hopper, supports_host_descriptor, ) diff --git a/tritonbench/kernels/gluon_attention_forward.py b/tritonbench/kernels/gluon_attention_forward.py index d9e5336f0..f39ba3ada 100644 --- a/tritonbench/kernels/gluon_attention_forward.py +++ b/tritonbench/kernels/gluon_attention_forward.py @@ -1,5 +1,3 @@ -import itertools - import torch import triton import triton.language as tl diff --git a/tritonbench/kernels/gluon_attention_persistent_forward.py b/tritonbench/kernels/gluon_attention_persistent_forward.py index a03de4492..0b0770d5e 100644 --- a/tritonbench/kernels/gluon_attention_persistent_forward.py +++ b/tritonbench/kernels/gluon_attention_persistent_forward.py @@ -1,5 +1,4 @@ import copy -import itertools import torch import triton diff --git a/tritonbench/operators/addmm/hstu.py b/tritonbench/operators/addmm/hstu.py index 2aeed9f85..3be3355c3 100644 --- a/tritonbench/operators/addmm/hstu.py +++ b/tritonbench/operators/addmm/hstu.py @@ -1,5 +1,3 @@ -import importlib - from typing import Tuple import torch diff --git a/tritonbench/operators/addmm/operator.py b/tritonbench/operators/addmm/operator.py index e5f9f8dc8..493f722e5 100644 --- a/tritonbench/operators/addmm/operator.py +++ b/tritonbench/operators/addmm/operator.py @@ -16,7 +16,6 @@ except ImportError: streamk_matmul = None -from tritonbench.operators.gemm import stream_k from tritonbench.utils.triton_op import ( BenchmarkOperator, BenchmarkOperatorMetrics, diff --git a/tritonbench/operators/bf16xint16_gemm/bf16xint16_gemm.py b/tritonbench/operators/bf16xint16_gemm/bf16xint16_gemm.py index 17ce423ea..528d3d690 100644 --- a/tritonbench/operators/bf16xint16_gemm/bf16xint16_gemm.py +++ b/tritonbench/operators/bf16xint16_gemm/bf16xint16_gemm.py @@ -10,13 +10,11 @@ """ import argparse -import statistics from typing import Any, List, Optional import torch import triton -import triton.language as tl from tritonbench.utils.triton_op import ( BenchmarkOperator, diff --git a/tritonbench/operators/blackwell_attentions/operator.py b/tritonbench/operators/blackwell_attentions/operator.py index e82578221..60eb79845 100644 --- a/tritonbench/operators/blackwell_attentions/operator.py +++ b/tritonbench/operators/blackwell_attentions/operator.py @@ -187,7 +187,7 @@ def _is_sdpa_cudnn_attention_available(): try: _sdpa_cudnn_attention(q, k, v) return True - except RuntimeError as e: + except RuntimeError: return False diff --git a/tritonbench/operators/flex_attention/operator.py b/tritonbench/operators/flex_attention/operator.py index d8fbc1b58..9166b7dc7 100644 --- a/tritonbench/operators/flex_attention/operator.py +++ b/tritonbench/operators/flex_attention/operator.py @@ -26,7 +26,6 @@ except ImportError: pass -from tritonbench.utils.input import input_filter from tritonbench.utils.triton_op import ( BenchmarkOperator, BenchmarkOperatorMetrics, @@ -379,7 +378,7 @@ def flash_v3( ) elif mod_type == "document_mask": # Document mask requires special handling with varlen function - print(f"[SKIP] Flash Attention v3 document_mask not implemented yet") + print("[SKIP] Flash Attention v3 document_mask not implemented yet") raise NotImplementedError( "Flash Attention v3 document_mask not implemented yet" ) diff --git a/tritonbench/operators/fp8_attention/operator.py b/tritonbench/operators/fp8_attention/operator.py index c5384e81b..c7fdce036 100644 --- a/tritonbench/operators/fp8_attention/operator.py +++ b/tritonbench/operators/fp8_attention/operator.py @@ -6,7 +6,7 @@ import argparse import math -from typing import Any, Callable, Generator, List, Optional, Tuple +from typing import Any, Callable, Generator, List, Optional import torch diff --git a/tritonbench/operators/fp8_gemm/persistent.py b/tritonbench/operators/fp8_gemm/persistent.py index 9c44e5a69..ad280df6f 100644 --- a/tritonbench/operators/fp8_gemm/persistent.py +++ b/tritonbench/operators/fp8_gemm/persistent.py @@ -1,5 +1,3 @@ -from functools import lru_cache - from typing import Optional import torch diff --git a/tritonbench/operators/fp8_gemm_blockwise/operator.py b/tritonbench/operators/fp8_gemm_blockwise/operator.py index 8ce3a2810..e771adca9 100644 --- a/tritonbench/operators/fp8_gemm_blockwise/operator.py +++ b/tritonbench/operators/fp8_gemm_blockwise/operator.py @@ -42,14 +42,10 @@ def parse_args(args: List[str]) -> argparse.Namespace: HAS_CUTLASS = False if is_cuda(): try: - import fbgemm_gpu.experimental.gen_ai - cutlass_fp8_block = torch.ops.llama_cpp.fp8_blockwise_matmul HAS_CUTLASS = True except: try: - import fbgemm_gpu.experimental.gen_ai - cutlass_fp8_block = torch.ops.fbgemm.f8f8bf16_blockwise HAS_CUTLASS = True except: diff --git a/tritonbench/operators/fp8_gemm_rowwise_grouped/operator.py b/tritonbench/operators/fp8_gemm_rowwise_grouped/operator.py index 1cd8e44b6..913aa3a15 100644 --- a/tritonbench/operators/fp8_gemm_rowwise_grouped/operator.py +++ b/tritonbench/operators/fp8_gemm_rowwise_grouped/operator.py @@ -48,14 +48,11 @@ # Import necessary libraries and modules import argparse -import random from typing import Any, Callable, Generator, List, Optional, Tuple import torch import triton -from tritonbench.utils.data_utils import get_production_shapes - from tritonbench.utils.triton_op import ( BenchmarkOperator, BenchmarkOperatorMetrics, diff --git a/tritonbench/operators/gdpa/gdpa.py b/tritonbench/operators/gdpa/gdpa.py index ffb59c39c..a0fea4d1c 100644 --- a/tritonbench/operators/gdpa/gdpa.py +++ b/tritonbench/operators/gdpa/gdpa.py @@ -19,7 +19,6 @@ from typing import Tuple import torch -import torch.nn.functional as F import triton # @manual=//triton:triton import triton.language as tl # @manual=//triton:triton diff --git a/tritonbench/operators/gdpa/gdpa_blackwell_tlx.py b/tritonbench/operators/gdpa/gdpa_blackwell_tlx.py index 3cc5be1da..acfd2abb3 100644 --- a/tritonbench/operators/gdpa/gdpa_blackwell_tlx.py +++ b/tritonbench/operators/gdpa/gdpa_blackwell_tlx.py @@ -10,7 +10,7 @@ from triton.tools.tensor_descriptor import TensorDescriptor from .gdpa_utils import get_num_sms -from .math import activation_string_to_int, fast_gelu_grad, gelu, gelu_grad +from .math import activation_string_to_int, gelu, gelu_grad def _host_descriptor_pre_hook(nargs): diff --git a/tritonbench/operators/gdpa/gdpa_utils.py b/tritonbench/operators/gdpa/gdpa_utils.py index 77ef95948..35d2e3ffa 100644 --- a/tritonbench/operators/gdpa/gdpa_utils.py +++ b/tritonbench/operators/gdpa/gdpa_utils.py @@ -3,7 +3,7 @@ # pyre-strict import math from functools import lru_cache -from typing import Any, List, Optional +from typing import Any, Optional import torch import triton # @manual=//triton:triton diff --git a/tritonbench/operators/gdpa/operator.py b/tritonbench/operators/gdpa/operator.py index 8fa69e538..41c287282 100644 --- a/tritonbench/operators/gdpa/operator.py +++ b/tritonbench/operators/gdpa/operator.py @@ -133,7 +133,7 @@ def parse_args(args): "--kv_len", default=None, type=int, - help=f"Sequence length for K/V, if None, the tensor will be jagged and have the same length as Q", + help="Sequence length for K/V, if None, the tensor will be jagged and have the same length as Q", ) parser.add_argument( "--activation", diff --git a/tritonbench/operators/grouped_gemm/cutedsl/kernels.py b/tritonbench/operators/grouped_gemm/cutedsl/kernels.py index 93952f269..133bce5a0 100644 --- a/tritonbench/operators/grouped_gemm/cutedsl/kernels.py +++ b/tritonbench/operators/grouped_gemm/cutedsl/kernels.py @@ -1996,7 +1996,7 @@ def compile_cutedsl_grouped_gemm( C_cpu = torch.zeros((m, n, 1), dtype=torch.float32) torch_fp32_tensors_abc_seed.append([A_cpu, B_cpu, C_cpu]) - print(f"Running Blackwell Grouped GEMM test with:") + print("Running Blackwell Grouped GEMM test with:") print(f"{num_groups} groups") for i, (m, n, k, l) in enumerate(problem_sizes_mnkl): print(f"Group {i}: {m}x{n}x{k}x{l}") diff --git a/tritonbench/operators/int4_gemm/int4_gemm.py b/tritonbench/operators/int4_gemm/int4_gemm.py index de1040940..095231190 100644 --- a/tritonbench/operators/int4_gemm/int4_gemm.py +++ b/tritonbench/operators/int4_gemm/int4_gemm.py @@ -6,13 +6,11 @@ """ import argparse -import statistics from typing import Any, List, Optional import torch import triton -import triton.language as tl from tritonbench.utils.triton_op import ( BenchmarkOperator, @@ -21,7 +19,7 @@ register_metric, ) -from .kernel import _group_quantize_tensor, matmul, matmul_kernel, pack_2xint4 +from .kernel import _group_quantize_tensor, matmul, pack_2xint4 class Operator(BenchmarkOperator): diff --git a/tritonbench/operators/jagged_layer_norm/operator.py b/tritonbench/operators/jagged_layer_norm/operator.py index e43a9af3d..632818f38 100644 --- a/tritonbench/operators/jagged_layer_norm/operator.py +++ b/tritonbench/operators/jagged_layer_norm/operator.py @@ -1,8 +1,5 @@ import argparse -import itertools -import math import os -import random from typing import Callable, Generator, List, Optional, Tuple import torch diff --git a/tritonbench/operators/jagged_softmax/operator.py b/tritonbench/operators/jagged_softmax/operator.py index 57c3a166a..b64ad511c 100644 --- a/tritonbench/operators/jagged_softmax/operator.py +++ b/tritonbench/operators/jagged_softmax/operator.py @@ -1,8 +1,5 @@ import argparse -import itertools -import math import os -import random from typing import Callable, Generator, List, Optional, Tuple import torch diff --git a/tritonbench/operators/launch_latency/operator.py b/tritonbench/operators/launch_latency/operator.py index a2099ce40..a800945e3 100644 --- a/tritonbench/operators/launch_latency/operator.py +++ b/tritonbench/operators/launch_latency/operator.py @@ -1,16 +1,9 @@ -import triton.language as tl from torch import zeros -from torch._C import _cuda_getCurrentRawStream as get_raw_stream from torch._inductor.utils import triton_version_uses_attrs_dict from triton.compiler import CompiledKernel -from tritonbench.utils.triton_op import ( - BenchmarkOperator, - BenchmarkOperatorMetrics, - register_benchmark, - register_metric, -) +from tritonbench.utils.triton_op import BenchmarkOperator, register_benchmark from .kernels import get_trivial_add_kernel, nop_kernel, nop_with_args_kernel diff --git a/tritonbench/operators/low_mem_dropout/kernels.py b/tritonbench/operators/low_mem_dropout/kernels.py index 3278b9464..c01a43d88 100644 --- a/tritonbench/operators/low_mem_dropout/kernels.py +++ b/tritonbench/operators/low_mem_dropout/kernels.py @@ -1,6 +1,3 @@ -import tabulate -import torch - import triton import triton.language as tl diff --git a/tritonbench/operators/mamba2_chunk_scan/operator.py b/tritonbench/operators/mamba2_chunk_scan/operator.py index c5cddcf1d..3795891d8 100644 --- a/tritonbench/operators/mamba2_chunk_scan/operator.py +++ b/tritonbench/operators/mamba2_chunk_scan/operator.py @@ -1,22 +1,17 @@ import argparse -import functools import itertools import os import sys -from contextlib import nullcontext -from itertools import chain -from typing import Any, Callable, Generator, List, Optional +from typing import Any, Generator, List, Optional import torch -from tritonbench.utils.input import input_filter from tritonbench.utils.python_utils import try_import from tritonbench.utils.triton_op import ( BenchmarkOperator, BenchmarkOperatorMetrics, - Mode as BenchmarkMode, register_benchmark, register_metric, register_x_val, diff --git a/tritonbench/operators/mamba2_chunk_state/operator.py b/tritonbench/operators/mamba2_chunk_state/operator.py index 0e2376c55..949f4c0c4 100644 --- a/tritonbench/operators/mamba2_chunk_state/operator.py +++ b/tritonbench/operators/mamba2_chunk_state/operator.py @@ -3,20 +3,16 @@ import itertools import os import sys -from contextlib import nullcontext -from itertools import chain -from typing import Any, Callable, Generator, List, Optional +from typing import Any, Generator, List, Optional import torch -from tritonbench.utils.input import input_filter from tritonbench.utils.python_utils import try_import from tritonbench.utils.triton_op import ( BenchmarkOperator, BenchmarkOperatorMetrics, - Mode as BenchmarkMode, register_benchmark, register_metric, register_x_val, diff --git a/tritonbench/operators/op_task.py b/tritonbench/operators/op_task.py index 413025b69..d4a84d315 100644 --- a/tritonbench/operators/op_task.py +++ b/tritonbench/operators/op_task.py @@ -96,10 +96,6 @@ def worker(self) -> subprocess_worker.SubprocessWorker: @base_task.run_in_worker(scoped=True) @staticmethod def _maybe_import_operator(package: str, op_name: str) -> Dict[str, Any]: - import importlib - import os - import traceback - from tritonbench.operators import load_opbench_by_name Operator = load_opbench_by_name(op_name) diff --git a/tritonbench/operators/ragged_attention/operator.py b/tritonbench/operators/ragged_attention/operator.py index ff2b25664..ec68e3045 100644 --- a/tritonbench/operators/ragged_attention/operator.py +++ b/tritonbench/operators/ragged_attention/operator.py @@ -1,12 +1,11 @@ import argparse -from typing import Any, Callable, List, Optional +from typing import List, Optional import torch from tritonbench.utils.env_utils import get_nvidia_gpu_model, is_cuda, is_fbcode -from tritonbench.utils.input import input_filter from tritonbench.utils.triton_op import ( BenchmarkOperator, BenchmarkOperatorMetrics, diff --git a/tritonbench/operators/softmax/operator.py b/tritonbench/operators/softmax/operator.py index e258630e6..665bef75d 100644 --- a/tritonbench/operators/softmax/operator.py +++ b/tritonbench/operators/softmax/operator.py @@ -1,5 +1,5 @@ import argparse -from typing import Generator, List, Optional +from typing import List, Optional import torch import triton diff --git a/tritonbench/operators/sum/kernels.py b/tritonbench/operators/sum/kernels.py index 65b865ef6..eb7795bfd 100644 --- a/tritonbench/operators/sum/kernels.py +++ b/tritonbench/operators/sum/kernels.py @@ -1,6 +1,5 @@ import itertools -import torch import triton import triton.language as tl diff --git a/tritonbench/operators/sum/operator.py b/tritonbench/operators/sum/operator.py index 6ea86f01a..ecad273bb 100644 --- a/tritonbench/operators/sum/operator.py +++ b/tritonbench/operators/sum/operator.py @@ -2,11 +2,10 @@ import itertools import math import os -from typing import Callable, Generator, List, Optional, Tuple +from typing import Callable, Generator, List, Optional import torch import triton -import triton.language as tl from tritonbench.utils.triton_op import ( BenchmarkOperator, @@ -18,7 +17,6 @@ from .kernels import ( triton_sum_kernel_1D_result_buffer_then_sum, triton_sum_kernel_1D_result_sum_then_buffer, - triton_sum_kernel_2D_result_dim_1, triton_sum_kernel_2D_result_dim_1_sum_then_buffer, triton_sum_kernel_scalar_result, ) diff --git a/tritonbench/operators/template_attention/operator.py b/tritonbench/operators/template_attention/operator.py index eff94ca8e..efd4b6962 100644 --- a/tritonbench/operators/template_attention/operator.py +++ b/tritonbench/operators/template_attention/operator.py @@ -1,20 +1,10 @@ import argparse -import csv -import os -import statistics -from typing import Any, Callable, Generator, List, Optional +from typing import Callable, Generator, List, Optional -import numpy import torch -import triton from torch._dynamo.testing import rand_strided, same -from tritonbench.utils.triton_op import ( - BenchmarkOperator, - BenchmarkOperatorMetrics, - register_benchmark, - register_metric, -) +from tritonbench.utils.triton_op import BenchmarkOperator, register_benchmark from .triton_attention import ( triton_attention_no_exp2 as triton_test_no_exp2, diff --git a/tritonbench/operators/template_attention/triton_attention.py b/tritonbench/operators/template_attention/triton_attention.py index 45c7864f4..e0eb5129a 100644 --- a/tritonbench/operators/template_attention/triton_attention.py +++ b/tritonbench/operators/template_attention/triton_attention.py @@ -7,9 +7,6 @@ import triton import triton.language as tl -from torch._C import _cuda_getCurrentRawStream as get_raw_stream -from torch._inductor.runtime import triton_helpers, triton_heuristics -from torch._inductor.runtime.triton_helpers import libdevice empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda reinterpret_tensor = torch.ops.inductor._reinterpret_tensor diff --git a/tritonbench/operators/vector_add/kernels.py b/tritonbench/operators/vector_add/kernels.py index 40aa3734f..28e0b2513 100644 --- a/tritonbench/operators/vector_add/kernels.py +++ b/tritonbench/operators/vector_add/kernels.py @@ -1,4 +1,3 @@ -import torch import triton import triton.language as tl diff --git a/tritonbench/operators/vector_exp/operator.py b/tritonbench/operators/vector_exp/operator.py index 97160955a..1967d00b7 100644 --- a/tritonbench/operators/vector_exp/operator.py +++ b/tritonbench/operators/vector_exp/operator.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Generator, List +from typing import Callable, Generator, List import torch import triton diff --git a/tritonbench/operators/welford/operator.py b/tritonbench/operators/welford/operator.py index 6deb43bad..c170cb3c1 100644 --- a/tritonbench/operators/welford/operator.py +++ b/tritonbench/operators/welford/operator.py @@ -1,14 +1,10 @@ import argparse -from typing import Any, Callable, Generator, List, Optional +from typing import Callable, Generator, List, Optional import torch from torch._dynamo.testing import rand_strided -from tritonbench.utils.triton_op import ( - BenchmarkOperator, - register_benchmark, - register_metric, -) +from tritonbench.utils.triton_op import BenchmarkOperator, register_benchmark from .triton_welford import ( fused_native_layer_norm as triton_welford, diff --git a/tritonbench/operators/welford/triton_welford.py b/tritonbench/operators/welford/triton_welford.py index b248cfe71..648d666c0 100644 --- a/tritonbench/operators/welford/triton_welford.py +++ b/tritonbench/operators/welford/triton_welford.py @@ -8,7 +8,7 @@ import triton import triton.language as tl from torch._C import _cuda_getCurrentRawStream as get_raw_stream -from torch._inductor.runtime import triton_helpers, triton_heuristics +from torch._inductor.runtime import triton_helpers from torch._inductor.runtime.triton_helpers import libdevice empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda diff --git a/tritonbench/utils/list_operator_details.py b/tritonbench/utils/list_operator_details.py index d11accb42..ebd29cb15 100644 --- a/tritonbench/utils/list_operator_details.py +++ b/tritonbench/utils/list_operator_details.py @@ -2,7 +2,6 @@ Utilities for listing operator details including metrics and backends in tritonbench. """ -import sys from dataclasses import fields from typing import Dict, List, Optional @@ -10,7 +9,6 @@ INDENT = " " # Base indentation unit (2 spaces) INDENT2 = INDENT * 2 # Double indentation (4 spaces) -from tritonbench.operators_collection import list_operators_by_collection from tritonbench.utils.operator_utils import ( batch_load_operators, get_backends_for_operator, @@ -18,7 +16,7 @@ get_overridden_metrics_for_operator, is_operator_loaded, ) -from tritonbench.utils.triton_op import BenchmarkOperatorMetrics, REGISTERED_BENCHMARKS +from tritonbench.utils.triton_op import BenchmarkOperatorMetrics def get_builtin_metrics() -> List[str]: diff --git a/tritonbench/utils/python_utils.py b/tritonbench/utils/python_utils.py index 568e7d9fd..d7b477a2d 100644 --- a/tritonbench/utils/python_utils.py +++ b/tritonbench/utils/python_utils.py @@ -9,5 +9,5 @@ def try_import(cond_name: str): try: yield _caller_globals[cond_name] = True - except (ImportError, ModuleNotFoundError) as e: + except (ImportError, ModuleNotFoundError): _caller_globals[cond_name] = False diff --git a/tritonbench/utils/run_utils.py b/tritonbench/utils/run_utils.py index df598eafc..5006362c7 100644 --- a/tritonbench/utils/run_utils.py +++ b/tritonbench/utils/run_utils.py @@ -96,7 +96,7 @@ def run_in_helion(op: str, op_args: Dict[str, str], extra_envs: Dict[str, str]): environ.update(extra_envs) cmd = [sys.executable, "benchmarks/run.py"] + op_args print( - f"[tritonbench] Running helion benchmark: " + " ".join(cmd), + "[tritonbench] Running helion benchmark: " + " ".join(cmd), flush=True, ) subprocess.check_call( @@ -303,7 +303,7 @@ def load_operator_by_args(task_args: List[str]): def run_one_operator(task_args: List[str], with_bwd: bool = False): op = load_operator_by_args(task_args) op.run() - if with_bwd and op.has_bwd() and not op.name in FWD_ONLY_OPS: + if with_bwd and op.has_bwd() and op.name not in FWD_ONLY_OPS: op_name = copy.deepcopy(op.name) del op if op_name in BWD_ARGS_OPS: diff --git a/tritonbench/utils/triton_op.py b/tritonbench/utils/triton_op.py index eaefeb2f2..5448b24e1 100644 --- a/tritonbench/utils/triton_op.py +++ b/tritonbench/utils/triton_op.py @@ -819,8 +819,8 @@ def __post__init__(self): ) if self._input_sample_mode == "equally-spaced-k": raise ValueError( - f"Cannot use --input-sample-mode equally-spaced-k with multiple input IDs. " - f"Either specify multiple IDs directly or use equally-spaced-k with --num-inputs." + "Cannot use --input-sample-mode equally-spaced-k with multiple input IDs. " + "Either specify multiple IDs directly or use equally-spaced-k with --num-inputs." ) # Validate that all IDs are within range invalid_ids = [