@@ -135,7 +135,6 @@ struct SearchAlgorithmBase<ConvKind::kForward> {
135135 size_t workspace_size_limit =
136136 CalcWorkspaceLimitInBytes (UseFixedWorkspace ());
137137
138- #if CUDNN_VERSION >= 7001
139138 int actual_perf_count;
140139 int best_algo_idx = 0 ;
141140 std::vector<PerfT> perf_results (kNUM_CUDNN_FWD_ALGS );
@@ -153,43 +152,15 @@ struct SearchAlgorithmBase<ConvKind::kForward> {
153152 result.workspace_size = perf_results[best_algo_idx].memory ;
154153
155154 if (result.workspace_size > workspace_size_limit) {
156- #if CUDNN_VERSION >= 8000
157155 VLOG (4 ) << GetPerfResultString<PerfT>(" [Heuristic] FwdAlgo Perf result" ,
158156 perf_results,
159157 actual_perf_count,
160158 workspace_size_limit);
161159 // cudnnGetConvolutionForwardAlgorithm is removed in CUDNN-8
162160 ChooseAlgoByWorkspace<PerfT, AlgoT>(
163161 perf_results, workspace_size_limit, &result);
164- #else
165- VLOG (3 ) << " Fallback to non-v7 method to find conv algorithm "
166- " because the workspace size request("
167- << result.workspace_size << " ) exceeds the limit("
168- << workspace_size_limit << " )" ;
169- PADDLE_ENFORCE_GPU_SUCCESS (
170- phi::dynload::cudnnGetConvolutionForwardAlgorithm (
171- args.handle ,
172- args.idesc .desc (),
173- args.wdesc .desc (),
174- args.cdesc .desc (),
175- args.odesc .desc (),
176- CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
177- workspace_size_limit,
178- &(result.algo )));
179- #endif
180162 }
181- #else
182- PADDLE_ENFORCE_GPU_SUCCESS (
183- phi::dynload::cudnnGetConvolutionForwardAlgorithm (
184- args.handle ,
185- args.idesc .desc (),
186- args.wdesc .desc (),
187- args.cdesc .desc (),
188- args.odesc .desc (),
189- CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
190- workspace_size_limit,
191- &(result.algo )));
192- #endif
163+
193164 result.workspace_size = GetWorkspaceSize (args, result.algo );
194165 return result;
195166 }
@@ -311,7 +282,6 @@ struct SearchAlgorithmBase<ConvKind::kBackwardData> {
311282 size_t workspace_size_limit =
312283 CalcWorkspaceLimitInBytes (UseFixedWorkspace ());
313284
314- #if CUDNN_VERSION >= 7001
315285 int actual_perf_count;
316286 int best_algo_idx = 0 ;
317287 std::vector<PerfT> perf_results (kNUM_CUDNN_BWD_DATA_ALGS );
@@ -327,53 +297,12 @@ struct SearchAlgorithmBase<ConvKind::kBackwardData> {
327297 perf_results.data ()));
328298 result.algo = perf_results[best_algo_idx].algo ;
329299
330- #if CUDNN_VERSION < 7500
331- int stride_dim = args.x ->dims ().size () - 2 ;
332- bool blacklist = std::any_of (args.s .begin (),
333- args.s .begin () + stride_dim,
334- [=](int n) { return n != 1 ; });
335- if (blacklist && (perf_results[best_algo_idx].algo ==
336- CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING ||
337- perf_results[best_algo_idx].algo ==
338- CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT)) {
339- result.algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
340- }
341- #endif
342300 result.workspace_size = GetWorkspaceSize (args, result.algo );
343301 if (result.workspace_size > workspace_size_limit) {
344- #if CUDNN_VERSION >= 8000
345302 // cudnnGetConvolutionBackwardDataAlgorithm is removed in CUDNN-8
346303 ChooseAlgoByWorkspace<PerfT, AlgoT>(
347304 perf_results, workspace_size_limit, &result);
348- #else
349- VLOG (1 ) << " Fallback to non-v7 method to find conv algorithm because "
350- " the workspace size request("
351- << result.workspace_size << " ) exceeds the limit("
352- << workspace_size_limit << " )" ;
353- PADDLE_ENFORCE_GPU_SUCCESS (
354- phi::dynload::cudnnGetConvolutionBackwardDataAlgorithm (
355- args.handle ,
356- args.wdesc .desc (),
357- args.odesc .desc (),
358- args.cdesc .desc (),
359- args.idesc .desc (),
360- CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
361- workspace_size_limit,
362- &(result.algo )));
363- #endif
364305 }
365- #else
366- PADDLE_ENFORCE_GPU_SUCCESS (
367- phi::dynload::cudnnGetConvolutionBackwardDataAlgorithm (
368- args.handle ,
369- args.wdesc .desc (),
370- args.odesc .desc (),
371- args.cdesc .desc (),
372- args.idesc .desc (),
373- CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
374- workspace_size_limit,
375- &(result.algo )));
376- #endif
377306 result.workspace_size = GetWorkspaceSize (args, result.algo );
378307 return result;
379308 }
@@ -495,7 +424,6 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
495424 size_t workspace_size_limit =
496425 CalcWorkspaceLimitInBytes (UseFixedWorkspace ());
497426
498- #if CUDNN_VERSION >= 7001
499427 int actual_perf_count;
500428 int best_algo_idx = 0 ;
501429 std::vector<PerfT> perf_results (kNUM_CUDNN_BWD_FILTER_ALGS );
@@ -513,39 +441,10 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
513441 result.workspace_size = perf_results[best_algo_idx].memory ;
514442
515443 if (result.workspace_size > workspace_size_limit) {
516- #if CUDNN_VERSION >= 8000
517444 // cudnnGetConvolutionBackwardFilterAlgorithm is removed in CUDNN-8
518445 ChooseAlgoByWorkspace<PerfT, AlgoT>(
519446 perf_results, workspace_size_limit, &result);
520- #else
521- VLOG (1 ) << " Fallback to non-v7 method to find conv algorithm because "
522- " the workspace size request("
523- << result.workspace_size << " ) exceeds the limit("
524- << workspace_size_limit << " )" ;
525- PADDLE_ENFORCE_GPU_SUCCESS (
526- phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithm (
527- args.handle ,
528- args.idesc .desc (),
529- args.odesc .desc (),
530- args.cdesc .desc (),
531- args.wdesc .desc (),
532- CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
533- workspace_size_limit,
534- &(result.algo )));
535- #endif
536447 }
537- #else
538- PADDLE_ENFORCE_GPU_SUCCESS (
539- phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithm (
540- args.handle ,
541- args.idesc .desc (),
542- args.odesc .desc (),
543- args.cdesc .desc (),
544- args.wdesc .desc (),
545- CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
546- workspace_size_limit,
547- &(result.algo )));
548- #endif
549448
550449 result.workspace_size = GetWorkspaceSize (args, result.algo );
551450 return result;
@@ -622,7 +521,6 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
622521 }
623522
624523 static int GetAlgorithmMaxCount (cudnnHandle_t handle) {
625- #if CUDNN_VERSION_MIN(7, 0, 1)
626524 int max_algos = 0 ;
627525 auto status =
628526 phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithmMaxCount (
@@ -632,7 +530,6 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
632530 << kNUM_CUDNN_BWD_FILTER_ALGS << " , actual=" << max_algos;
633531 return max_algos;
634532 }
635- #endif
636533 return kNUM_CUDNN_BWD_FILTER_ALGS ;
637534 }
638535
@@ -736,12 +633,10 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> {
736633 const phi::GPUContext& dev_ctx,
737634 cudnnDataType_t dtype,
738635 const phi::backends::gpu::ConvolutionDescriptor& cdesc) {
739- #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
740636 if (dev_ctx.GetComputeCapability () >= 70 && dtype == CUDNN_DATA_HALF) {
741637 PADDLE_ENFORCE_GPU_SUCCESS (phi::dynload::cudnnSetConvolutionMathType (
742638 cdesc.desc (), CUDNN_TENSOR_OP_MATH));
743639 VLOG (5 ) << " Enable Tensor Core for FLOAT16" ;
744- #if CUDA_VERSION >= 11000
745640#if CUDNN_VERSION_MIN(8, 1, 0)
746641 } else if (dev_ctx.GetComputeCapability () >= 80 &&
747642 dtype == CUDNN_DATA_BFLOAT16) {
@@ -753,12 +648,10 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> {
753648 VLOG (5 ) << " Disable TensorFloat (Tensor Core) for FLOAT" ;
754649 PADDLE_ENFORCE_GPU_SUCCESS (phi::dynload::cudnnSetConvolutionMathType (
755650 cdesc.desc (), CUDNN_FMA_MATH));
756- #endif // CUDA_VERSION >= 11000
757651 } else {
758652 PADDLE_ENFORCE_GPU_SUCCESS (phi::dynload::cudnnSetConvolutionMathType (
759653 cdesc.desc (), CUDNN_DEFAULT_MATH));
760654 }
761- #endif
762655 }
763656};
764657
0 commit comments