Skip to content

Commit ae259fe

Browse files
authored
fix cudnn check in conv_cudnn_v7.h (#76176)
1 parent c9811d6 commit ae259fe

File tree

1 file changed

+1
-108
lines changed

1 file changed

+1
-108
lines changed

paddle/phi/kernels/gpudnn/conv_cudnn_v7.h

Lines changed: 1 addition & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@ struct SearchAlgorithmBase<ConvKind::kForward> {
135135
size_t workspace_size_limit =
136136
CalcWorkspaceLimitInBytes(UseFixedWorkspace());
137137

138-
#if CUDNN_VERSION >= 7001
139138
int actual_perf_count;
140139
int best_algo_idx = 0;
141140
std::vector<PerfT> perf_results(kNUM_CUDNN_FWD_ALGS);
@@ -153,43 +152,15 @@ struct SearchAlgorithmBase<ConvKind::kForward> {
153152
result.workspace_size = perf_results[best_algo_idx].memory;
154153

155154
if (result.workspace_size > workspace_size_limit) {
156-
#if CUDNN_VERSION >= 8000
157155
VLOG(4) << GetPerfResultString<PerfT>("[Heuristic] FwdAlgo Perf result",
158156
perf_results,
159157
actual_perf_count,
160158
workspace_size_limit);
161159
// cudnnGetConvolutionForwardAlgorithm is removed in CUDNN-8
162160
ChooseAlgoByWorkspace<PerfT, AlgoT>(
163161
perf_results, workspace_size_limit, &result);
164-
#else
165-
VLOG(3) << "Fallback to non-v7 method to find conv algorithm "
166-
"because the workspace size request("
167-
<< result.workspace_size << ") exceeds the limit("
168-
<< workspace_size_limit << ")";
169-
PADDLE_ENFORCE_GPU_SUCCESS(
170-
phi::dynload::cudnnGetConvolutionForwardAlgorithm(
171-
args.handle,
172-
args.idesc.desc(),
173-
args.wdesc.desc(),
174-
args.cdesc.desc(),
175-
args.odesc.desc(),
176-
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
177-
workspace_size_limit,
178-
&(result.algo)));
179-
#endif
180162
}
181-
#else
182-
PADDLE_ENFORCE_GPU_SUCCESS(
183-
phi::dynload::cudnnGetConvolutionForwardAlgorithm(
184-
args.handle,
185-
args.idesc.desc(),
186-
args.wdesc.desc(),
187-
args.cdesc.desc(),
188-
args.odesc.desc(),
189-
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
190-
workspace_size_limit,
191-
&(result.algo)));
192-
#endif
163+
193164
result.workspace_size = GetWorkspaceSize(args, result.algo);
194165
return result;
195166
}
@@ -311,7 +282,6 @@ struct SearchAlgorithmBase<ConvKind::kBackwardData> {
311282
size_t workspace_size_limit =
312283
CalcWorkspaceLimitInBytes(UseFixedWorkspace());
313284

314-
#if CUDNN_VERSION >= 7001
315285
int actual_perf_count;
316286
int best_algo_idx = 0;
317287
std::vector<PerfT> perf_results(kNUM_CUDNN_BWD_DATA_ALGS);
@@ -327,53 +297,12 @@ struct SearchAlgorithmBase<ConvKind::kBackwardData> {
327297
perf_results.data()));
328298
result.algo = perf_results[best_algo_idx].algo;
329299

330-
#if CUDNN_VERSION < 7500
331-
int stride_dim = args.x->dims().size() - 2;
332-
bool blacklist = std::any_of(args.s.begin(),
333-
args.s.begin() + stride_dim,
334-
[=](int n) { return n != 1; });
335-
if (blacklist && (perf_results[best_algo_idx].algo ==
336-
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING ||
337-
perf_results[best_algo_idx].algo ==
338-
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT)) {
339-
result.algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
340-
}
341-
#endif
342300
result.workspace_size = GetWorkspaceSize(args, result.algo);
343301
if (result.workspace_size > workspace_size_limit) {
344-
#if CUDNN_VERSION >= 8000
345302
// cudnnGetConvolutionBackwardDataAlgorithm is removed in CUDNN-8
346303
ChooseAlgoByWorkspace<PerfT, AlgoT>(
347304
perf_results, workspace_size_limit, &result);
348-
#else
349-
VLOG(1) << "Fallback to non-v7 method to find conv algorithm because "
350-
"the workspace size request("
351-
<< result.workspace_size << ") exceeds the limit("
352-
<< workspace_size_limit << ")";
353-
PADDLE_ENFORCE_GPU_SUCCESS(
354-
phi::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
355-
args.handle,
356-
args.wdesc.desc(),
357-
args.odesc.desc(),
358-
args.cdesc.desc(),
359-
args.idesc.desc(),
360-
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
361-
workspace_size_limit,
362-
&(result.algo)));
363-
#endif
364305
}
365-
#else
366-
PADDLE_ENFORCE_GPU_SUCCESS(
367-
phi::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
368-
args.handle,
369-
args.wdesc.desc(),
370-
args.odesc.desc(),
371-
args.cdesc.desc(),
372-
args.idesc.desc(),
373-
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
374-
workspace_size_limit,
375-
&(result.algo)));
376-
#endif
377306
result.workspace_size = GetWorkspaceSize(args, result.algo);
378307
return result;
379308
}
@@ -495,7 +424,6 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
495424
size_t workspace_size_limit =
496425
CalcWorkspaceLimitInBytes(UseFixedWorkspace());
497426

498-
#if CUDNN_VERSION >= 7001
499427
int actual_perf_count;
500428
int best_algo_idx = 0;
501429
std::vector<PerfT> perf_results(kNUM_CUDNN_BWD_FILTER_ALGS);
@@ -513,39 +441,10 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
513441
result.workspace_size = perf_results[best_algo_idx].memory;
514442

515443
if (result.workspace_size > workspace_size_limit) {
516-
#if CUDNN_VERSION >= 8000
517444
// cudnnGetConvolutionBackwardFilterAlgorithm is removed in CUDNN-8
518445
ChooseAlgoByWorkspace<PerfT, AlgoT>(
519446
perf_results, workspace_size_limit, &result);
520-
#else
521-
VLOG(1) << "Fallback to non-v7 method to find conv algorithm because "
522-
"the workspace size request("
523-
<< result.workspace_size << ") exceeds the limit("
524-
<< workspace_size_limit << ")";
525-
PADDLE_ENFORCE_GPU_SUCCESS(
526-
phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
527-
args.handle,
528-
args.idesc.desc(),
529-
args.odesc.desc(),
530-
args.cdesc.desc(),
531-
args.wdesc.desc(),
532-
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
533-
workspace_size_limit,
534-
&(result.algo)));
535-
#endif
536447
}
537-
#else
538-
PADDLE_ENFORCE_GPU_SUCCESS(
539-
phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
540-
args.handle,
541-
args.idesc.desc(),
542-
args.odesc.desc(),
543-
args.cdesc.desc(),
544-
args.wdesc.desc(),
545-
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
546-
workspace_size_limit,
547-
&(result.algo)));
548-
#endif
549448

550449
result.workspace_size = GetWorkspaceSize(args, result.algo);
551450
return result;
@@ -622,7 +521,6 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
622521
}
623522

624523
static int GetAlgorithmMaxCount(cudnnHandle_t handle) {
625-
#if CUDNN_VERSION_MIN(7, 0, 1)
626524
int max_algos = 0;
627525
auto status =
628526
phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
@@ -632,7 +530,6 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
632530
<< kNUM_CUDNN_BWD_FILTER_ALGS << ", actual=" << max_algos;
633531
return max_algos;
634532
}
635-
#endif
636533
return kNUM_CUDNN_BWD_FILTER_ALGS;
637534
}
638535

@@ -736,12 +633,10 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> {
736633
const phi::GPUContext& dev_ctx,
737634
cudnnDataType_t dtype,
738635
const phi::backends::gpu::ConvolutionDescriptor& cdesc) {
739-
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
740636
if (dev_ctx.GetComputeCapability() >= 70 && dtype == CUDNN_DATA_HALF) {
741637
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType(
742638
cdesc.desc(), CUDNN_TENSOR_OP_MATH));
743639
VLOG(5) << "Enable Tensor Core for FLOAT16";
744-
#if CUDA_VERSION >= 11000
745640
#if CUDNN_VERSION_MIN(8, 1, 0)
746641
} else if (dev_ctx.GetComputeCapability() >= 80 &&
747642
dtype == CUDNN_DATA_BFLOAT16) {
@@ -753,12 +648,10 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> {
753648
VLOG(5) << "Disable TensorFloat (Tensor Core) for FLOAT";
754649
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType(
755650
cdesc.desc(), CUDNN_FMA_MATH));
756-
#endif // CUDA_VERSION >= 11000
757651
} else {
758652
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType(
759653
cdesc.desc(), CUDNN_DEFAULT_MATH));
760654
}
761-
#endif
762655
}
763656
};
764657

0 commit comments

Comments
 (0)