We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1d7d4c1 commit 8508819Copy full SHA for 8508819
paddle/phi/kernels/legacy/gpu/ln_bwd_semi_cuda_kernel.cu
@@ -115,7 +115,7 @@ void launch_(LaunchParams<BwdParams> &launch_params, // NOLINT
115
// Create backward launch function and register. Macro signature:
116
// HIDDEN_SIZE, WTYPE, ITYPE, OTYPE, CTYPE, CTAS_PER_ROW, WARPS_M, WARPS_N,
117
// BYTES_PER_LDG, BYTES_PER_LDG_FINAL
118
-
+#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700)
119
REGISTER_BWD_LAUNCHER(768, fp32, fp32, fp32, fp32, 1, 4, 1, 16, 4);
120
REGISTER_BWD_LAUNCHER(768, fp16, fp16, fp16, fp32, 1, 4, 1, 16, 4);
121
REGISTER_BWD_LAUNCHER(768, fp16, fp32, fp16, fp32, 1, 4, 1, 16, 4);
@@ -271,3 +271,4 @@ REGISTER_BWD_LAUNCHER(65536, fp16, fp16, fp16, fp32, 8, 1, 8, 16, 4);
271
REGISTER_BWD_LAUNCHER(65536, fp16, fp32, fp16, fp32, 8, 1, 8, 16, 4);
272
REGISTER_BWD_LAUNCHER(65536, bf16, bf16, bf16, fp32, 8, 1, 8, 16, 4);
273
REGISTER_BWD_LAUNCHER(65536, bf16, fp32, bf16, fp32, 8, 1, 8, 16, 4);
274
+#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700)
paddle/phi/kernels/legacy/gpu/ln_fwd_cuda_kernel.cu
@@ -100,7 +100,7 @@ void launch_(LaunchParams<FwdParams> &launch_params, // NOLINT
100
// Create forward launch function and register. Macro signature:
101
102
// BYTES_PER_LDG
103
104
REGISTER_FWD_LAUNCHER(768, fp32, fp32, fp32, fp32, 1, 4, 1, 16);
105
REGISTER_FWD_LAUNCHER(768, fp16, fp16, fp16, fp32, 1, 4, 1, 16);
106
REGISTER_FWD_LAUNCHER(768, fp16, fp32, fp16, fp32, 1, 4, 1, 16);
@@ -256,3 +256,4 @@ REGISTER_FWD_LAUNCHER(65536, fp16, fp16, fp16, fp32, 8, 1, 4, 16);
256
REGISTER_FWD_LAUNCHER(65536, fp16, fp32, fp16, fp32, 8, 1, 4, 16);
257
REGISTER_FWD_LAUNCHER(65536, bf16, bf16, bf16, fp32, 8, 1, 4, 16);
258
REGISTER_FWD_LAUNCHER(65536, bf16, fp32, bf16, fp32, 8, 1, 4, 16);
259
0 commit comments