Skip to content

Commit 8508819

Browse files
authored
bypass fused_ln instatiation in architectures below sm_70 (#76628)
1 parent 1d7d4c1 commit 8508819

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

paddle/phi/kernels/legacy/gpu/ln_bwd_semi_cuda_kernel.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ void launch_(LaunchParams<BwdParams> &launch_params, // NOLINT
115115
// Create backward launch function and register. Macro signature:
116116
// HIDDEN_SIZE, WTYPE, ITYPE, OTYPE, CTYPE, CTAS_PER_ROW, WARPS_M, WARPS_N,
117117
// BYTES_PER_LDG, BYTES_PER_LDG_FINAL
118-
118+
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700)
119119
REGISTER_BWD_LAUNCHER(768, fp32, fp32, fp32, fp32, 1, 4, 1, 16, 4);
120120
REGISTER_BWD_LAUNCHER(768, fp16, fp16, fp16, fp32, 1, 4, 1, 16, 4);
121121
REGISTER_BWD_LAUNCHER(768, fp16, fp32, fp16, fp32, 1, 4, 1, 16, 4);
@@ -271,3 +271,4 @@ REGISTER_BWD_LAUNCHER(65536, fp16, fp16, fp16, fp32, 8, 1, 8, 16, 4);
271271
REGISTER_BWD_LAUNCHER(65536, fp16, fp32, fp16, fp32, 8, 1, 8, 16, 4);
272272
REGISTER_BWD_LAUNCHER(65536, bf16, bf16, bf16, fp32, 8, 1, 8, 16, 4);
273273
REGISTER_BWD_LAUNCHER(65536, bf16, fp32, bf16, fp32, 8, 1, 8, 16, 4);
274+
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700)

paddle/phi/kernels/legacy/gpu/ln_fwd_cuda_kernel.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ void launch_(LaunchParams<FwdParams> &launch_params, // NOLINT
100100
// Create forward launch function and register. Macro signature:
101101
// HIDDEN_SIZE, WTYPE, ITYPE, OTYPE, CTYPE, CTAS_PER_ROW, WARPS_M, WARPS_N,
102102
// BYTES_PER_LDG
103-
103+
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700)
104104
REGISTER_FWD_LAUNCHER(768, fp32, fp32, fp32, fp32, 1, 4, 1, 16);
105105
REGISTER_FWD_LAUNCHER(768, fp16, fp16, fp16, fp32, 1, 4, 1, 16);
106106
REGISTER_FWD_LAUNCHER(768, fp16, fp32, fp16, fp32, 1, 4, 1, 16);
@@ -256,3 +256,4 @@ REGISTER_FWD_LAUNCHER(65536, fp16, fp16, fp16, fp32, 8, 1, 4, 16);
256256
REGISTER_FWD_LAUNCHER(65536, fp16, fp32, fp16, fp32, 8, 1, 4, 16);
257257
REGISTER_FWD_LAUNCHER(65536, bf16, bf16, bf16, fp32, 8, 1, 4, 16);
258258
REGISTER_FWD_LAUNCHER(65536, bf16, fp32, bf16, fp32, 8, 1, 4, 16);
259+
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700)

0 commit comments

Comments
 (0)