Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
0d37b86
export: poc inline implementation for matrices and vectors
Jul 9, 2025
341e092
export: move poc implementation to tpfalinearizer.hh
Jul 10, 2025
0b11a63
export: seemingly working implementation
Jul 11, 2025
29008b7
export: deactivate export code for now
Aug 20, 2025
02538ed
bench: a handful of norne benchmark results
Aug 20, 2025
969e957
solve: establishing trivial entry point
Aug 21, 2025
4c2a177
solve: populating bsr_matrix object
Aug 22, 2025
baffd19
solve: sparsity of LDU components of bildu preconditioner
Aug 23, 2025
a530e48
solve: compiling implementation of block ildu factorization and appli…
Aug 25, 2025
bb2d311
solve: bugfix and optimizing block ildu factorization
Sep 5, 2025
7ac5239
solve: introduce solver memory
Sep 5, 2025
0679b69
solve: compiling version of mixed precision solver
Sep 8, 2025
24251d2
solve: investigating residual discrepancy
Sep 11, 2025
494dcd8
solve: running version of mixed precision solver
Sep 12, 2025
70e23d6
solve: mixed precision solver as flexible solver
Sep 18, 2025
a79dd5d
solve: introduce a trivial preconditioner
Sep 18, 2025
7bf956d
solve: support for full block ILDU0
Sep 22, 2025
6a50f7b
solve: removing commented out code
Sep 24, 2025
69dba35
solve: comannd line option for mixed-dilu
Sep 24, 2025
0507726
Add missing code for gcc-12 build
SoilRos Sep 25, 2025
7f75310
solve: print linear convergence information
Sep 30, 2025
f9e56de
solve: use residual as shadow space
Sep 30, 2025
c2530c5
reorg: pull bsr into separate source files
Oct 1, 2025
87fbfd4
reorg: pull bilu into separate source files
Oct 1, 2025
ca68e2c
reorg: pull bslv into separate source files
Oct 2, 2025
980dd5f
reorg: getting rid of original monolithic files
Oct 2, 2025
16623fb
reorg: pull mixed solver wrapper into separate header file
Oct 2, 2025
6a39be6
reorg: add double precision bicgstab
Oct 2, 2025
614d16e
reorg: rename bildu struct to prec
Oct 3, 2025
d057e46
reorg: rename factorization routines
Oct 3, 2025
f3d312b
reorg: remove dev/test files
Oct 7, 2025
eb4a52a
reorg: delete obsolete ISTSolver modofications
Oct 7, 2025
d0e71d1
reorg: delete development printf statements
Oct 7, 2025
440b48a
mixed: add README with basic usage information
Oct 7, 2025
cd1b9c3
mixed: properly freeing memory allocations
Oct 8, 2025
4b01b82
mixed: rename <obj>_new() to <obj>_alloc()
Oct 8, 2025
d2aa913
export: properly delete temporary arrays
Oct 8, 2025
80f5bb7
mixed: avoid copying matrix in MixedSolver constructor
Nov 19, 2025
590e4af
mixed: address unused variables compilation warnings
Nov 19, 2025
4251657
mixed: fix char vs string error
Nov 19, 2025
3777ce1
mixed: remove deactivated code
Nov 19, 2025
5a53331
mixed: use unique_ptr instead of new/delete
Nov 19, 2025
8d04daf
mixed: prevent mixed-bicgstab from trying to run on gpus
Nov 20, 2025
a2a024e
mixed: make sure posix_memalign is visible
Nov 20, 2025
e25863f
mixed: document solver related functions
Nov 21, 2025
16e2192
mixed: document bsr matrix related functions
Nov 21, 2025
d29a989
mixed: document preconditioner related functions
Nov 24, 2025
ab8aa15
mixed: remove unused function
Nov 24, 2025
107672e
mixed: document export related functions
Nov 24, 2025
e75a90e
mixed: replace posix_memalign with aligned_alloc
Nov 25, 2025
2d7520b
mixed: assert all (m)allocated arrays
Nov 25, 2025
de38990
mixed: remove random shadow space
Nov 25, 2025
d1efe94
mixed: fix indentation
Nov 25, 2025
3b62234
mixed: throw if well contributions are not added to matrix
Nov 26, 2025
28eedc3
mixed: prepare for excplicit checking for avx2 support
Nov 27, 2025
f4a8ff7
mixed: add public header files
Nov 27, 2025
f34f3be
Check for AVX2 support.
blattms Nov 25, 2025
40e8839
Compile mixed precision code only if AVX2 is supported.
blattms Nov 25, 2025
651d71f
Force a C standard that provides aligned_alloc
blattms Nov 27, 2025
8322000
mixed: throw if using single-precision vectors
Dec 1, 2025
72fce2d
mixed: add header guard
Dec 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -750,8 +750,15 @@ endif()

add_custom_target(extra_test ${CMAKE_CTEST_COMMAND} -C ExtraTests)

# must link libraries after target 'opmsimulators' has been defined
# must activate avx2 flags and C11 for mixed precision.
if(HAVE_AVX2_EXTENSION)
# Trying to set the standard using `set(CMAKE_C_STANDARD 11)` had no effect, still used -std=c99
# Hence we are requesting C17. (-std=c99 -D_ISOC11_SOURCE also seems to work)
set_property(SOURCE ${AVX2_SOURCE_FILES} PROPERTY COMPILE_FLAGS "${AVX2_FLAGS}" APPEND_STRING)
set_property(TARGET opmsimulators PROPERTY C_STANDARD 17)
endif()

# must link libraries after target 'opmsimulators' has been defined
if(CUDA_FOUND)
if (NOT USE_HIP)
target_link_libraries(opmsimulators
Expand Down
19 changes: 19 additions & 0 deletions CMakeLists_files.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,15 @@ list (APPEND MAIN_SOURCE_FILES
opm/simulators/wells/WGState.cpp
)

if (HAVE_AVX2_EXTENSION)
set (AVX2_SOURCE_FILES
opm/simulators/linalg/mixed/bsr.c
opm/simulators/linalg/mixed/prec.c
opm/simulators/linalg/mixed/bslv.c)
list (APPEND MAIN_SOURCE_FILES
${AVX2_SOURCE_FILES})
endif()

if (HAVE_ECL_INPUT)
list (APPEND MAIN_SOURCE_FILES
opm/simulators/utils/satfunc/GasPhaseConsistencyChecks.cpp
Expand Down Expand Up @@ -1252,6 +1261,16 @@ if (HAVE_ECL_INPUT)
)
endif()

if (HAVE_AVX2_EXTENSION)
list (APPEND PUBLIC_HEADER_FILES
opm/simulators/linalg/mixed/bslv.h
opm/simulators/linalg/mixed/bsr.h
opm/simulators/linalg/mixed/prec.h
opm/simulators/linalg/mixed/vec.h
opm/simulators/linalg/mixed/wrapper.hpp
)
endif()

if (Damaris_FOUND AND MPI_FOUND AND USE_DAMARIS_LIB)
list (APPEND PUBLIC_HEADER_FILES
opm/simulators/utils/DamarisKeywords.hpp
Expand Down
4 changes: 4 additions & 0 deletions opm-simulators-prereqs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set (opm-simulators_CONFIG_VAR
HAVE_MPI
HAVE_PETSC
COMPILE_GPU_BRIDGE
HAVE_AVX2_EXTENSION
HAVE_CUDA
HAVE_OPENCL
HAVE_OPENCL_HPP
Expand Down Expand Up @@ -37,6 +38,9 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30.0)
set(_Boost_CONFIG_MODE CONFIG)
endif()

include(CheckAVX2)
check_for_avx2()

# dependencies
set (opm-simulators_DEPS
# Compile with C99 support if available
Expand Down
51 changes: 49 additions & 2 deletions opm/models/discretization/common/fvbaselinearizer.hh
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,55 @@ public:
GlobalEqVector& residual()
{ return residual_; }

void setLinearizationType(LinearizationType linearizationType)
{ linearizationType_ = linearizationType; }
void printVector(GlobalEqVector&, const char *name="x")
{
return;
}

void printResidual(const char *name="r")
{
printVector(residual_);
}

void printSparsity(const char *name="s")
{
return;
}

void printNonzeros(const char *name="d")
{
return;
}

void printJacobian()
{
return;
}

void exportSystem(int idx, char *tag, const char *path="export")
{
return;
}

void exportVector(GlobalEqVector &x, const char *tag="", const char *name="export/x")
{
printf("n = %lu\n",x.dim());
}

void exportSparsity(const char *path=".")
{
return;
}

void exportNonzeros(const char *tag="", const char *path=".")
{
return;
}


void setLinearizationType(LinearizationType linearizationType){
linearizationType_ = linearizationType;
};

const LinearizationType& getLinearizationType() const
{ return linearizationType_; }
Expand Down
200 changes: 200 additions & 0 deletions opm/models/discretization/common/tpfalinearizer.hh
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ public:
{
simulatorPtr_ = nullptr;
separateSparseSourceTerms_ = Parameters::Get<Parameters::SeparateSparseSourceTerms>();
exportIndex_=-1;
exportCount_=-1;
}

/*!
Expand Down Expand Up @@ -412,6 +414,201 @@ public:
GlobalEqVector& residual()
{ return residual_; }

/*!
* \brief Print first 16 block elements of a vector.
*/
void printVector(GlobalEqVector &x, const char *name="x")
{
int count = 1;
printf("%s =\n[\n",name);
for (auto block = x.begin(); block != x.end(); block++)
{
for (auto i=block->begin(); i!=block->end(); i++)
{
printf(" %+.4e",*i);
}
printf("\n");
count++;
if(count>16) break;
}
printf("]\n");
}

/*!
* \brief Print first 16 block elements of residual.
*/
void printResidual(const char *name="r")
{
printVector(residual_, name);
}

/*!
* \brief Print sparsity pattern of the first 16 rows
* of the jacobian block matrix
*/
void printSparsity(const char *name="s")
{
auto& A = jacobian_->istlMatrix();

printf("nrows = %lu\n",A.N());
printf("ncols = %lu\n",A.M());
printf("nnz = %lu\n",A.nonzeroes());

printf("%s =\n[\n",name);
int count=1;
int offset=0;
for(auto row=A.begin(); row!=A.end(); row++)
{
printf("%4d: ",offset);
for(unsigned int i=0;i<row->getsize();i++)
{
printf(" %4lu",row->getindexptr()[i]);
}
printf("\n");
offset+=row->getsize();
count++;
if(count>16) break;
}
printf("]\n");
}

/*!
* \brief Print block elements of the first 6 rows of the
* j acobian block matrix
*/
void printNonzeros(const char *name="d")
{
auto& A = jacobian_->istlMatrix();
printf("%s =\n[\n",name);
int count=1;
for(auto row=A.begin();row!=A.end();row++)
{
for(unsigned int j=0;j<row->getsize();j++)
{
printf("|");
auto mat = row->getptr()[j];
for(auto vec=mat.begin();vec!=mat.end();vec++)
{
for(auto k=vec->begin();k!=vec->end();k++)
{
printf(" %+.4e",*k);
}
printf(" |");
}
printf("\n");
}
count++;
if(count>6) break;
printf("\n");
}
printf("]\n");
}

/*!
* \brief Print sparsity pattern and nonzeros of jacobian block matrix
*/
void printJacobian()
{
printSparsity();
printNonzeros();
}

/*!
* \brief Export blocks-sparse linear system.
*/
void exportSystem(int idx, char *tag, const char *path="export")
{
// export sparsity only once
if(exportIndex_==-1) exportSparsity(path);

// increment indices and generate tag
exportCount_ = exportIndex_==idx ? ++exportCount_ : 0;
exportIndex_ = idx;
sprintf(tag,"_%03d_%02d",exportIndex_, exportCount_);

printf("index = %d\n", exportIndex_);
printf("count = %d\n", exportCount_);

// export matrix
exportNonzeros(tag,path);

// export residual
char name[256];
sprintf(name,"%s/r",path);
exportVector(residual_,tag,name);
}

/*!
* \brief Export block vector.
*/
void exportVector(GlobalEqVector &x, const char *tag="", const char *name="export/x")
{
// assume double precision and contiguous data
const double *data = &x[0][0];

char filename[512];
sprintf(filename,"%s%s.f64",name,tag);
FILE *out =fopen(filename,"w");
fwrite(data, sizeof(double), x.dim(),out);
fclose(out);
}

/*!
* \brief Export nonzero blocks of jacobian block-sparse matrix
*/
void exportNonzeros(const char *tag="", const char *path=".")
{
auto& A = jacobian_->istlMatrix();

// assume double precision and contiguous data
const double *data = &A[0][0][0][0];
size_t dim = A[0][0].N()*A[0][0].M()*A.nonzeroes();

char filename[256];
sprintf(filename,"%s/data%s.f64",path,tag);
FILE *out =fopen(filename,"w");
fwrite(data, sizeof(double), dim,out);
fclose(out);
}

/*!
* \brief Export sparsity pattern of jacobian block-sparse matrix
*/
void exportSparsity(const char *path=".")
{
//assemble csr graph
auto& A = jacobian_->istlMatrix();
auto rows = std::make_unique<int[]>(A.N()+1);
auto cols = std::make_unique<int[]>(A.nonzeroes());

int irow=0;
int icol=0;
rows[0]=0;
for(auto row=A.begin(); row!=A.end(); row++)
{
for(unsigned int i=0;i<row->getsize();i++)
{
cols[icol++]=row->getindexptr()[i];
}
rows[irow+1]= rows[irow]+row->getsize();
irow++;
}

//export arrays
FILE *out;
char filename[256];

sprintf(filename,"%s/rows.i32",path);
out=fopen(filename,"w");
fwrite(rows, sizeof(int), A.N()+1,out);
fclose(out);

sprintf(filename,"%s/cols.i32",path);
out=fopen(filename,"w");
fwrite(cols, sizeof(int), A.nonzeroes(),out);
fclose(out);
}

void setLinearizationType(LinearizationType linearizationType)
{ linearizationType_ = linearizationType; }

Expand Down Expand Up @@ -1063,6 +1260,9 @@ private:
bool separateSparseSourceTerms_ = false;

FullDomain<> fullDomain_;

int exportIndex_;
int exportCount_;
};
} // namespace Opm

Expand Down
23 changes: 22 additions & 1 deletion opm/simulators/linalg/FlexibleSolver_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
#include <opm/simulators/linalg/PreconditionerFactoryGPUIncludeWrapper.hpp>
#include <opm/simulators/linalg/is_gpu_operator.hpp>

#if HAVE_AVX2_EXTENSION
#include <opm/simulators/linalg/mixed/wrapper.hpp>
#endif

#include <dune/common/fmatrix.hh>
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/solvers.hh>
Expand Down Expand Up @@ -181,6 +185,24 @@ namespace Dune
tol, // desired residual reduction factor
maxiter, // maximum number of iterations
verbosity);
#if HAVE_AVX2_EXTENSION
} else if (solver_type == "mixed-bicgstab") {
if constexpr (Opm::is_gpu_operator_v<Operator>) {
OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for GPU operatorsg");
} else if constexpr (std::is_same_v<typename VectorType::field_type, float>){
OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for single precision.");
} else {
const std::string prec_type = prm.get<std::string>("preconditioner.type", "error");
bool use_mixed_dilu= (prec_type=="mixed-dilu");
using MatrixType = decltype(linearoperator_for_solver_->getmat());
linsolver_ = std::make_shared<Dune::MixedSolver<VectorType,MatrixType>>(
linearoperator_for_solver_->getmat(),
tol,
maxiter,
use_mixed_dilu
);
}
#endif
} else if (solver_type == "loopsolver") {
linsolver_ = std::make_shared<Dune::LoopSolver<VectorType>>(*linearoperator_for_solver_,
*scalarproduct_,
Expand All @@ -197,7 +219,6 @@ namespace Dune
restart,
maxiter, // maximum number of iterations
verbosity);

} else {
if constexpr (!Opm::is_gpu_operator_v<Operator>) {
if (solver_type == "flexgmres") {
Expand Down
Loading