Skip to content

Commit 6aaf6ff

Browse files
authored
Merge pull request #6521 from haugenlabs/mixed-pr
Added a serial mixed-precision solver with support for bicstab with ilu and dilu on the CPU.
2 parents 338144c + c2d0c5f commit 6aaf6ff

18 files changed

+1992
-4
lines changed

CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -750,8 +750,15 @@ endif()
750750

751751
add_custom_target(extra_test ${CMAKE_CTEST_COMMAND} -C ExtraTests)
752752

753-
# must link libraries after target 'opmsimulators' has been defined
753+
# must activate avx2 flags and C11 for mixed precision.
754+
if(HAVE_AVX2_EXTENSION)
755+
# Trying to set the standard using `set(CMAKE_C_STANDARD 11)` had no effect, still used -std=c99
756+
# Hence we are requesting C17. (-std=c99 -D_ISOC11_SOURCE also seems to work)
757+
set_property(SOURCE ${AVX2_SOURCE_FILES} PROPERTY COMPILE_FLAGS "${AVX2_FLAGS}" APPEND_STRING)
758+
set_property(TARGET opmsimulators PROPERTY C_STANDARD 17)
759+
endif()
754760

761+
# must link libraries after target 'opmsimulators' has been defined
755762
if(CUDA_FOUND)
756763
if (NOT USE_HIP)
757764
target_link_libraries(opmsimulators

CMakeLists_files.cmake

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,15 @@ list (APPEND MAIN_SOURCE_FILES
267267
opm/simulators/wells/WGState.cpp
268268
)
269269

270+
if (HAVE_AVX2_EXTENSION)
271+
set (AVX2_SOURCE_FILES
272+
opm/simulators/linalg/mixed/bsr.c
273+
opm/simulators/linalg/mixed/prec.c
274+
opm/simulators/linalg/mixed/bslv.c)
275+
list (APPEND MAIN_SOURCE_FILES
276+
${AVX2_SOURCE_FILES})
277+
endif()
278+
270279
if (HAVE_ECL_INPUT)
271280
list (APPEND MAIN_SOURCE_FILES
272281
opm/simulators/utils/satfunc/GasPhaseConsistencyChecks.cpp
@@ -1264,6 +1273,16 @@ if (HAVE_ECL_INPUT)
12641273
)
12651274
endif()
12661275

1276+
if (HAVE_AVX2_EXTENSION)
1277+
list (APPEND PUBLIC_HEADER_FILES
1278+
opm/simulators/linalg/mixed/bslv.h
1279+
opm/simulators/linalg/mixed/bsr.h
1280+
opm/simulators/linalg/mixed/prec.h
1281+
opm/simulators/linalg/mixed/vec.h
1282+
opm/simulators/linalg/mixed/wrapper.hpp
1283+
)
1284+
endif()
1285+
12671286
if (Damaris_FOUND AND MPI_FOUND AND USE_DAMARIS_LIB)
12681287
list (APPEND PUBLIC_HEADER_FILES
12691288
opm/simulators/utils/DamarisKeywords.hpp

opm-simulators-prereqs.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ set (opm-simulators_CONFIG_VAR
66
HAVE_MPI
77
HAVE_PETSC
88
COMPILE_GPU_BRIDGE
9+
HAVE_AVX2_EXTENSION
910
HAVE_CUDA
1011
HAVE_OPENCL
1112
HAVE_OPENCL_HPP
@@ -37,6 +38,9 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30.0)
3738
set(_Boost_CONFIG_MODE CONFIG)
3839
endif()
3940

41+
include(CheckAVX2)
42+
check_for_avx2()
43+
4044
# dependencies
4145
set (opm-simulators_DEPS
4246
# Compile with C99 support if available

opm/models/discretization/common/fvbaselinearizer.hh

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,55 @@ public:
287287
GlobalEqVector& residual()
288288
{ return residual_; }
289289

290-
void setLinearizationType(LinearizationType linearizationType)
291-
{ linearizationType_ = linearizationType; }
290+
void printVector(GlobalEqVector&, const char *name="x")
291+
{
292+
return;
293+
}
294+
295+
void printResidual(const char *name="r")
296+
{
297+
printVector(residual_);
298+
}
299+
300+
void printSparsity(const char *name="s")
301+
{
302+
return;
303+
}
304+
305+
void printNonzeros(const char *name="d")
306+
{
307+
return;
308+
}
309+
310+
void printJacobian()
311+
{
312+
return;
313+
}
314+
315+
void exportSystem(int idx, char *tag, const char *path="export")
316+
{
317+
return;
318+
}
319+
320+
void exportVector(GlobalEqVector &x, const char *tag="", const char *name="export/x")
321+
{
322+
printf("n = %lu\n",x.dim());
323+
}
324+
325+
void exportSparsity(const char *path=".")
326+
{
327+
return;
328+
}
329+
330+
void exportNonzeros(const char *tag="", const char *path=".")
331+
{
332+
return;
333+
}
334+
335+
336+
void setLinearizationType(LinearizationType linearizationType){
337+
linearizationType_ = linearizationType;
338+
};
292339

293340
const LinearizationType& getLinearizationType() const
294341
{ return linearizationType_; }

opm/models/discretization/common/tpfalinearizer.hh

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@
6060
#include <unordered_map>
6161
#include <vector>
6262

63+
#include <fmt/format.h>
64+
6365
#include <opm/common/utility/gpuDecorators.hpp>
6466
#if HAVE_CUDA
6567
#if USE_HIP
@@ -236,6 +238,8 @@ public:
236238
{
237239
simulatorPtr_ = nullptr;
238240
separateSparseSourceTerms_ = Parameters::Get<Parameters::SeparateSparseSourceTerms>();
241+
exportIndex_=-1;
242+
exportCount_=-1;
239243
}
240244

241245
/*!
@@ -414,6 +418,201 @@ public:
414418
GlobalEqVector& residual()
415419
{ return residual_; }
416420

421+
/*!
422+
* \brief Print first 16 block elements of a vector.
423+
*/
424+
void printVector(GlobalEqVector &x, const char *name="x")
425+
{
426+
int count = 1;
427+
fmt::print("{} =\n[\n",name);
428+
for (auto block = x.begin(); block != x.end(); block++)
429+
{
430+
for (auto i=block->begin(); i!=block->end(); i++)
431+
{
432+
fmt::print(" {:+.4e}",*i);
433+
}
434+
fmt::print("\n");
435+
count++;
436+
if(count>16) break;
437+
}
438+
fmt::print("]\n");
439+
}
440+
441+
/*!
442+
* \brief Print first 16 block elements of residual.
443+
*/
444+
void printResidual(const char *name="r")
445+
{
446+
printVector(residual_, name);
447+
}
448+
449+
/*!
450+
* \brief Print sparsity pattern of the first 16 rows
451+
* of the jacobian block matrix
452+
*/
453+
void printSparsity(const char *name="s")
454+
{
455+
auto& A = jacobian_->istlMatrix();
456+
457+
fmt::print("nrows = {:d}\n",A.N());
458+
fmt::print("ncols = {:d}\n",A.M());
459+
fmt::print("nnz = {:d}\n",A.nonzeroes());
460+
461+
fmt::print("{} =\n[\n",name);
462+
int count=1;
463+
int offset=0;
464+
for(auto row=A.begin(); row!=A.end(); row++)
465+
{
466+
fmt::print("{:4d}: ",offset);
467+
for(unsigned int i=0;i<row->getsize();i++)
468+
{
469+
fmt::print(" {:4d}",row->getindexptr()[i]);
470+
}
471+
fmt::print("\n");
472+
offset+=row->getsize();
473+
count++;
474+
if(count>16) break;
475+
}
476+
fmt::print("]\n");
477+
}
478+
479+
/*!
480+
* \brief Print block elements of the first 6 rows of the
481+
* j acobian block matrix
482+
*/
483+
void printNonzeros(const char *name="d")
484+
{
485+
auto& A = jacobian_->istlMatrix();
486+
fmt::print("{} =\n[\n",name);
487+
int count=1;
488+
for(auto row=A.begin();row!=A.end();row++)
489+
{
490+
for(unsigned int j=0;j<row->getsize();j++)
491+
{
492+
fmt::print("|");
493+
auto mat = row->getptr()[j];
494+
for(auto vec=mat.begin();vec!=mat.end();vec++)
495+
{
496+
for(auto k=vec->begin();k!=vec->end();k++)
497+
{
498+
fmt::print(" {:+.4e}",*k);
499+
}
500+
fmt::print(" |");
501+
}
502+
fmt::print("\n");
503+
}
504+
count++;
505+
if(count>6) break;
506+
fmt::print("\n");
507+
}
508+
fmt::print("]\n");
509+
}
510+
511+
/*!
512+
* \brief Print sparsity pattern and nonzeros of jacobian block matrix
513+
*/
514+
void printJacobian()
515+
{
516+
printSparsity();
517+
printNonzeros();
518+
}
519+
520+
/*!
521+
* \brief Export blocks-sparse linear system.
522+
*/
523+
void exportSystem(int idx, char *tag, const char *path="export")
524+
{
525+
// export sparsity only once
526+
if(exportIndex_==-1) exportSparsity(path);
527+
528+
// increment indices and generate tag
529+
exportCount_ = exportIndex_==idx ? ++exportCount_ : 0;
530+
exportIndex_ = idx;
531+
sprintf(tag,"_%03d_%02d",exportIndex_, exportCount_);
532+
533+
fmt::print("index = {:d}\n", exportIndex_);
534+
fmt::print("count = {:d}\n", exportCount_);
535+
536+
// export matrix
537+
exportNonzeros(tag,path);
538+
539+
// export residual
540+
char name[256];
541+
sprintf(name,"%s/r",path);
542+
exportVector(residual_,tag,name);
543+
}
544+
545+
/*!
546+
* \brief Export block vector.
547+
*/
548+
void exportVector(GlobalEqVector &x, const char *tag="", const char *name="export/x")
549+
{
550+
// assume double precision and contiguous data
551+
const double *data = &x[0][0];
552+
553+
char filename[512];
554+
sprintf(filename,"%s%s.f64",name,tag);
555+
FILE *out =fopen(filename,"w");
556+
fwrite(data, sizeof(double), x.dim(),out);
557+
fclose(out);
558+
}
559+
560+
/*!
561+
* \brief Export nonzero blocks of jacobian block-sparse matrix
562+
*/
563+
void exportNonzeros(const char *tag="", const char *path=".")
564+
{
565+
auto& A = jacobian_->istlMatrix();
566+
567+
// assume double precision and contiguous data
568+
const double *data = &A[0][0][0][0];
569+
size_t dim = A[0][0].N()*A[0][0].M()*A.nonzeroes();
570+
571+
char filename[256];
572+
sprintf(filename,"%s/data%s.f64",path,tag);
573+
FILE *out =fopen(filename,"w");
574+
fwrite(data, sizeof(double), dim,out);
575+
fclose(out);
576+
}
577+
578+
/*!
579+
* \brief Export sparsity pattern of jacobian block-sparse matrix
580+
*/
581+
void exportSparsity(const char *path=".")
582+
{
583+
//assemble csr graph
584+
auto& A = jacobian_->istlMatrix();
585+
auto rows = std::make_unique<int[]>(A.N()+1);
586+
auto cols = std::make_unique<int[]>(A.nonzeroes());
587+
588+
int irow=0;
589+
int icol=0;
590+
rows[0]=0;
591+
for(auto row=A.begin(); row!=A.end(); row++)
592+
{
593+
for(unsigned int i=0;i<row->getsize();i++)
594+
{
595+
cols[icol++]=row->getindexptr()[i];
596+
}
597+
rows[irow+1]= rows[irow]+row->getsize();
598+
irow++;
599+
}
600+
601+
//export arrays
602+
FILE *out;
603+
char filename[256];
604+
605+
sprintf(filename,"%s/rows.i32",path);
606+
out=fopen(filename,"w");
607+
fwrite(rows, sizeof(int), A.N()+1,out);
608+
fclose(out);
609+
610+
sprintf(filename,"%s/cols.i32",path);
611+
out=fopen(filename,"w");
612+
fwrite(cols, sizeof(int), A.nonzeroes(),out);
613+
fclose(out);
614+
}
615+
417616
void setLinearizationType(LinearizationType linearizationType)
418617
{ linearizationType_ = linearizationType; }
419618

@@ -1067,6 +1266,9 @@ private:
10671266
bool separateSparseSourceTerms_ = false;
10681267

10691268
FullDomain<> fullDomain_;
1269+
1270+
int exportIndex_;
1271+
int exportCount_;
10701272
};
10711273
} // namespace Opm
10721274

opm/simulators/linalg/FlexibleSolver_impl.hpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
#include <opm/simulators/linalg/PreconditionerFactoryGPUIncludeWrapper.hpp>
3333
#include <opm/simulators/linalg/is_gpu_operator.hpp>
3434

35+
#if HAVE_AVX2_EXTENSION
36+
#include <opm/simulators/linalg/mixed/wrapper.hpp>
37+
#endif
38+
3539
#include <dune/common/fmatrix.hh>
3640
#include <dune/istl/bcrsmatrix.hh>
3741
#include <dune/istl/solvers.hh>
@@ -181,6 +185,24 @@ namespace Dune
181185
tol, // desired residual reduction factor
182186
maxiter, // maximum number of iterations
183187
verbosity);
188+
#if HAVE_AVX2_EXTENSION
189+
} else if (solver_type == "mixed-bicgstab") {
190+
if constexpr (Opm::is_gpu_operator_v<Operator>) {
191+
OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for GPU operatorsg");
192+
} else if constexpr (std::is_same_v<typename VectorType::field_type, float>){
193+
OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for single precision.");
194+
} else {
195+
const std::string prec_type = prm.get<std::string>("preconditioner.type", "error");
196+
bool use_mixed_dilu= (prec_type=="mixed-dilu");
197+
using MatrixType = decltype(linearoperator_for_solver_->getmat());
198+
linsolver_ = std::make_shared<Dune::MixedSolver<VectorType,MatrixType>>(
199+
linearoperator_for_solver_->getmat(),
200+
tol,
201+
maxiter,
202+
use_mixed_dilu
203+
);
204+
}
205+
#endif
184206
} else if (solver_type == "loopsolver") {
185207
linsolver_ = std::make_shared<Dune::LoopSolver<VectorType>>(*linearoperator_for_solver_,
186208
*scalarproduct_,
@@ -197,7 +219,6 @@ namespace Dune
197219
restart,
198220
maxiter, // maximum number of iterations
199221
verbosity);
200-
201222
} else {
202223
if constexpr (!Opm::is_gpu_operator_v<Operator>) {
203224
if (solver_type == "flexgmres") {

0 commit comments

Comments
 (0)