OPM · nrseman · Jul 9, 2025 · Jul 10, 2025 · Jul 11, 2025 · Aug 20, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -750,8 +750,15 @@ endif()
 
 add_custom_target(extra_test ${CMAKE_CTEST_COMMAND} -C ExtraTests)
 
-# must link libraries after target 'opmsimulators' has been defined
+# must activate avx2 flags and C11 for mixed precision.
+if(HAVE_AVX2_EXTENSION)
+  # Trying to set the standard using `set(CMAKE_C_STANDARD 11)` had no effect, still used -std=c99
+  # Hence we are requesting C17. (-std=c99 -D_ISOC11_SOURCE also seems to work)
+  set_property(SOURCE ${AVX2_SOURCE_FILES} PROPERTY COMPILE_FLAGS "${AVX2_FLAGS}" APPEND_STRING)
+  set_property(TARGET opmsimulators PROPERTY C_STANDARD 17)
+endif()
 
+# must link libraries after target 'opmsimulators' has been defined
 if(CUDA_FOUND)
   if (NOT USE_HIP)
     target_link_libraries(opmsimulators

diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake
@@ -262,6 +262,15 @@ list (APPEND MAIN_SOURCE_FILES
   opm/simulators/wells/WGState.cpp
   )
 
+if (HAVE_AVX2_EXTENSION)
+  set (AVX2_SOURCE_FILES
+    opm/simulators/linalg/mixed/bsr.c
+    opm/simulators/linalg/mixed/prec.c
+    opm/simulators/linalg/mixed/bslv.c)
+  list (APPEND MAIN_SOURCE_FILES
+    ${AVX2_SOURCE_FILES})
+endif()
+
 if (HAVE_ECL_INPUT)
   list (APPEND MAIN_SOURCE_FILES
     opm/simulators/utils/satfunc/GasPhaseConsistencyChecks.cpp
@@ -1252,6 +1261,16 @@ if (HAVE_ECL_INPUT)
   )
 endif()
 
+if (HAVE_AVX2_EXTENSION)
+  list (APPEND PUBLIC_HEADER_FILES
+    opm/simulators/linalg/mixed/bslv.h
+    opm/simulators/linalg/mixed/bsr.h
+    opm/simulators/linalg/mixed/prec.h
+    opm/simulators/linalg/mixed/vec.h
+    opm/simulators/linalg/mixed/wrapper.hpp
+  )
+endif()
+
 if (Damaris_FOUND AND MPI_FOUND AND USE_DAMARIS_LIB)
   list (APPEND PUBLIC_HEADER_FILES
     opm/simulators/utils/DamarisKeywords.hpp

diff --git a/opm-simulators-prereqs.cmake b/opm-simulators-prereqs.cmake
@@ -6,6 +6,7 @@ set (opm-simulators_CONFIG_VAR
   HAVE_MPI
   HAVE_PETSC
   COMPILE_GPU_BRIDGE
+  HAVE_AVX2_EXTENSION
   HAVE_CUDA
   HAVE_OPENCL
   HAVE_OPENCL_HPP
@@ -37,6 +38,9 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30.0)
 	set(_Boost_CONFIG_MODE CONFIG)
 endif()
 
+include(CheckAVX2)
+check_for_avx2()
+
 # dependencies
 set (opm-simulators_DEPS
   # Compile with C99 support if available

diff --git a/opm/models/discretization/common/fvbaselinearizer.hh b/opm/models/discretization/common/fvbaselinearizer.hh
@@ -288,8 +288,55 @@ public:
     GlobalEqVector& residual()
     { return residual_; }
 
-    void setLinearizationType(LinearizationType linearizationType)
-    { linearizationType_ = linearizationType; }
+    void printVector(GlobalEqVector&, const char *name="x")
+    {
+        return;
+    }
+
+    void printResidual(const char *name="r")
+    {
+        printVector(residual_);
+    }
+
+    void printSparsity(const char *name="s")
+    {
+        return;
+    }
+
+    void printNonzeros(const char *name="d")
+    {
+        return;
+    }
+
+    void printJacobian()
+    {
+        return;
+    }
+
+    void exportSystem(int idx, char *tag, const char *path="export")
+    {
+        return;
+    }
+
+    void exportVector(GlobalEqVector &x, const char *tag="", const char *name="export/x")
+    {
+        printf("n = %lu\n",x.dim());
+    }
+
+    void exportSparsity(const char *path=".")
+    {
+        return;
+    }
+
+    void exportNonzeros(const char *tag="", const char *path=".")
+    {
+        return;
+    }
+
+
+    void setLinearizationType(LinearizationType linearizationType){
+        linearizationType_ = linearizationType;
+    };
 
     const LinearizationType& getLinearizationType() const
     { return linearizationType_; }

diff --git a/opm/models/discretization/common/tpfalinearizer.hh b/opm/models/discretization/common/tpfalinearizer.hh
@@ -236,6 +236,8 @@ public:
     {
         simulatorPtr_ = nullptr;
         separateSparseSourceTerms_ = Parameters::Get<Parameters::SeparateSparseSourceTerms>();
+        exportIndex_=-1;
+        exportCount_=-1;
     }
 
     /*!
@@ -412,6 +414,201 @@ public:
     GlobalEqVector& residual()
     { return residual_; }
 
+    /*!
+     * \brief Print first 16 block elements of a vector.
+     */
+    void printVector(GlobalEqVector &x, const char *name="x")
+    {
+        int count = 1;
+        printf("%s =\n[\n",name);
+        for (auto block = x.begin(); block != x.end(); block++)
+        {
+            for (auto i=block->begin(); i!=block->end(); i++)
+            {
+                printf(" %+.4e",*i);
+            }
+            printf("\n");
+            count++;
+            if(count>16) break;
+        }
+        printf("]\n");
+    }
+
+    /*!
+     * \brief Print first 16 block elements of residual.
+     */
+    void printResidual(const char *name="r")
+    {
+        printVector(residual_, name);
+    }
+
+    /*!
+     * \brief Print sparsity pattern of the first 16 rows
+     *        of the jacobian block matrix
+     */
+    void printSparsity(const char *name="s")
+    {
+        auto& A = jacobian_->istlMatrix();
+
+        printf("nrows = %lu\n",A.N());
+        printf("ncols = %lu\n",A.M());
+        printf("nnz   = %lu\n",A.nonzeroes());
+
+        printf("%s =\n[\n",name);
+        int count=1;
+        int offset=0;
+        for(auto row=A.begin(); row!=A.end(); row++)
+        {
+            printf("%4d: ",offset);
+            for(unsigned int i=0;i<row->getsize();i++)
+            {
+                printf(" %4lu",row->getindexptr()[i]);
+            }
+            printf("\n");
+            offset+=row->getsize();
+            count++;
+            if(count>16) break;
+        }
+        printf("]\n");
+    }
+
+    /*!
+     * \brief Print block elements of the first 6 rows of the
+     * j      acobian block matrix
+     */
+    void printNonzeros(const char *name="d")
+    {
+        auto& A = jacobian_->istlMatrix();
+        printf("%s =\n[\n",name);
+        int count=1;
+        for(auto row=A.begin();row!=A.end();row++)
+        {
+            for(unsigned int j=0;j<row->getsize();j++)
+            {
+                printf("|");
+                auto mat = row->getptr()[j];
+                for(auto vec=mat.begin();vec!=mat.end();vec++)
+                {
+                    for(auto k=vec->begin();k!=vec->end();k++)
+                    {
+                        printf(" %+.4e",*k);
+                    }
+                    printf(" |");
+                }
+                printf("\n");
+            }
+            count++;
+            if(count>6) break;
+            printf("\n");
+        }
+        printf("]\n");
+    }
+
+    /*!
+     * \brief Print sparsity pattern and nonzeros of jacobian block matrix
+     */
+    void printJacobian()
+    {
+        printSparsity();
+        printNonzeros();
+    }
+
+    /*!
+     * \brief Export blocks-sparse linear system.
+     */
+    void exportSystem(int idx, char *tag, const char *path="export")
+    {
+        // export sparsity only once
+        if(exportIndex_==-1) exportSparsity(path);
+
+        // increment indices and generate tag
+        exportCount_ = exportIndex_==idx ? ++exportCount_ : 0;
+        exportIndex_ = idx;
+        sprintf(tag,"_%03d_%02d",exportIndex_, exportCount_);
+
+        printf("index = %d\n", exportIndex_);
+        printf("count = %d\n", exportCount_);
+
+        // export matrix
+        exportNonzeros(tag,path);
+
+        // export residual
+        char name[256];
+        sprintf(name,"%s/r",path);
+        exportVector(residual_,tag,name);
+    }
+
+    /*!
+     * \brief Export block vector.
+     */
+    void exportVector(GlobalEqVector &x, const char *tag="", const char *name="export/x")
+    {
+        // assume double precision and contiguous data
+        const double *data = &x[0][0];
+
+        char filename[512];
+        sprintf(filename,"%s%s.f64",name,tag);
+        FILE *out =fopen(filename,"w");
+        fwrite(data, sizeof(double), x.dim(),out);
+        fclose(out);
+    }
+
+    /*!
+     * \brief Export nonzero blocks of jacobian block-sparse matrix
+     */
+    void exportNonzeros(const char *tag="", const char *path=".")
+    {
+        auto& A = jacobian_->istlMatrix();
+
+        // assume double precision and contiguous data
+        const double *data = &A[0][0][0][0];
+        size_t dim = A[0][0].N()*A[0][0].M()*A.nonzeroes();
+
+        char filename[256];
+        sprintf(filename,"%s/data%s.f64",path,tag);
+        FILE *out =fopen(filename,"w");
+        fwrite(data, sizeof(double), dim,out);
+        fclose(out);
+    }
+
+    /*!
+     * \brief Export sparsity pattern of jacobian block-sparse matrix
+     */
+    void exportSparsity(const char *path=".")
+    {
+        //assemble csr graph
+        auto& A = jacobian_->istlMatrix();
+        auto rows = std::make_unique<int[]>(A.N()+1);
+        auto cols = std::make_unique<int[]>(A.nonzeroes());
+
+        int irow=0;
+        int icol=0;
+        rows[0]=0;
+        for(auto row=A.begin(); row!=A.end(); row++)
+        {
+            for(unsigned int i=0;i<row->getsize();i++)
+            {
+                cols[icol++]=row->getindexptr()[i];
+            }
+            rows[irow+1]= rows[irow]+row->getsize();
+            irow++;
+        }
+
+        //export arrays
+        FILE *out;
+        char filename[256];
+
+        sprintf(filename,"%s/rows.i32",path);
+        out=fopen(filename,"w");
+        fwrite(rows, sizeof(int), A.N()+1,out);
+        fclose(out);
+
+        sprintf(filename,"%s/cols.i32",path);
+        out=fopen(filename,"w");
+        fwrite(cols, sizeof(int), A.nonzeroes(),out);
+        fclose(out);
+    }
+
     void setLinearizationType(LinearizationType linearizationType)
     { linearizationType_ = linearizationType; }
 
@@ -1063,6 +1260,9 @@ private:
     bool separateSparseSourceTerms_ = false;
 
     FullDomain<> fullDomain_;
+
+    int exportIndex_;
+    int exportCount_;
 };
 } // namespace Opm
 

diff --git a/opm/simulators/linalg/FlexibleSolver_impl.hpp b/opm/simulators/linalg/FlexibleSolver_impl.hpp
@@ -32,6 +32,10 @@
 #include <opm/simulators/linalg/PreconditionerFactoryGPUIncludeWrapper.hpp>
 #include <opm/simulators/linalg/is_gpu_operator.hpp>
 
+#if HAVE_AVX2_EXTENSION
+#include <opm/simulators/linalg/mixed/wrapper.hpp>
+#endif
+
 #include <dune/common/fmatrix.hh>
 #include <dune/istl/bcrsmatrix.hh>
 #include <dune/istl/solvers.hh>
@@ -181,6 +185,24 @@ namespace Dune
                                                                             tol, // desired residual reduction factor
                                                                             maxiter, // maximum number of iterations
                                                                             verbosity);
+#if HAVE_AVX2_EXTENSION
+          } else if (solver_type == "mixed-bicgstab") {
+              if constexpr (Opm::is_gpu_operator_v<Operator>) {
+                OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for GPU operatorsg");
+            } else if constexpr (std::is_same_v<typename VectorType::field_type, float>){
+                OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for single precision.");
+            } else {
+                const std::string prec_type = prm.get<std::string>("preconditioner.type", "error");
+                bool use_mixed_dilu= (prec_type=="mixed-dilu");
+                using MatrixType = decltype(linearoperator_for_solver_->getmat());
+                linsolver_ = std::make_shared<Dune::MixedSolver<VectorType,MatrixType>>(
+                                                                            linearoperator_for_solver_->getmat(),
+                                                                            tol,
+                                                                            maxiter,
+                                                                            use_mixed_dilu
+                                                                        );
+            }
+#endif
         } else if (solver_type == "loopsolver") {
             linsolver_ = std::make_shared<Dune::LoopSolver<VectorType>>(*linearoperator_for_solver_,
                                                                         *scalarproduct_,
@@ -197,7 +219,6 @@ namespace Dune
                                                                                   restart,
                                                                                   maxiter, // maximum number of iterations
                                                                                   verbosity);
-
         } else {
             if constexpr (!Opm::is_gpu_operator_v<Operator>) {
                 if (solver_type == "flexgmres") {