diff --git a/CMakeLists.txt b/CMakeLists.txt
index c82883cd93c..cb123d3bd32 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -750,8 +750,15 @@ endif()
 
 add_custom_target(extra_test ${CMAKE_CTEST_COMMAND} -C ExtraTests)
 
-# must link libraries after target 'opmsimulators' has been defined
+# must activate avx2 flags and C11 for mixed precision.
+if(HAVE_AVX2_EXTENSION)
+  # Trying to set the standard using `set(CMAKE_C_STANDARD 11)` had no effect, still used -std=c99
+  # Hence we are requesting C17. (-std=c99 -D_ISOC11_SOURCE also seems to work)
+  set_property(SOURCE ${AVX2_SOURCE_FILES} PROPERTY COMPILE_FLAGS "${AVX2_FLAGS}" APPEND_STRING)
+  set_property(TARGET opmsimulators PROPERTY C_STANDARD 17)
+endif()
 
+# must link libraries after target 'opmsimulators' has been defined
 if(CUDA_FOUND)
   if (NOT USE_HIP)
     target_link_libraries(opmsimulators
diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake
index c1254cc1f40..bca21262dde 100644
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@@ -262,6 +262,15 @@ list (APPEND MAIN_SOURCE_FILES
   opm/simulators/wells/WGState.cpp
   )
 
+if (HAVE_AVX2_EXTENSION)
+  set (AVX2_SOURCE_FILES
+    opm/simulators/linalg/mixed/bsr.c
+    opm/simulators/linalg/mixed/prec.c
+    opm/simulators/linalg/mixed/bslv.c)
+  list (APPEND MAIN_SOURCE_FILES
+    ${AVX2_SOURCE_FILES})
+endif()
+
 if (HAVE_ECL_INPUT)
   list (APPEND MAIN_SOURCE_FILES
     opm/simulators/utils/satfunc/GasPhaseConsistencyChecks.cpp
@@ -1252,6 +1261,16 @@ if (HAVE_ECL_INPUT)
   )
 endif()
 
+if (HAVE_AVX2_EXTENSION)
+  list (APPEND PUBLIC_HEADER_FILES
+    opm/simulators/linalg/mixed/bslv.h
+    opm/simulators/linalg/mixed/bsr.h
+    opm/simulators/linalg/mixed/prec.h
+    opm/simulators/linalg/mixed/vec.h
+    opm/simulators/linalg/mixed/wrapper.hpp
+  )
+endif()
+
 if (Damaris_FOUND AND MPI_FOUND AND USE_DAMARIS_LIB)
   list (APPEND PUBLIC_HEADER_FILES
     opm/simulators/utils/DamarisKeywords.hpp
diff --git a/opm-simulators-prereqs.cmake b/opm-simulators-prereqs.cmake
index 58bb27ec9ef..826ae0a912e 100644
--- a/opm-simulators-prereqs.cmake
+++ b/opm-simulators-prereqs.cmake
@@ -6,6 +6,7 @@ set (opm-simulators_CONFIG_VAR
   HAVE_MPI
   HAVE_PETSC
   COMPILE_GPU_BRIDGE
+  HAVE_AVX2_EXTENSION
   HAVE_CUDA
   HAVE_OPENCL
   HAVE_OPENCL_HPP
@@ -37,6 +38,9 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30.0)
 	set(_Boost_CONFIG_MODE CONFIG)
 endif()
 
+include(CheckAVX2)
+check_for_avx2()
+
 # dependencies
 set (opm-simulators_DEPS
   # Compile with C99 support if available
diff --git a/opm/models/discretization/common/fvbaselinearizer.hh b/opm/models/discretization/common/fvbaselinearizer.hh
index 1e34f1a4358..c5c4e33dfe0 100644
--- a/opm/models/discretization/common/fvbaselinearizer.hh
+++ b/opm/models/discretization/common/fvbaselinearizer.hh
@@ -288,8 +288,55 @@ public:
     GlobalEqVector& residual()
     { return residual_; }
 
-    void setLinearizationType(LinearizationType linearizationType)
-    { linearizationType_ = linearizationType; }
+    void printVector(GlobalEqVector&, const char *name="x")
+    {
+        return;
+    }
+
+    void printResidual(const char *name="r")
+    {
+        printVector(residual_);
+    }
+
+    void printSparsity(const char *name="s")
+    {
+        return;
+    }
+
+    void printNonzeros(const char *name="d")
+    {
+        return;
+    }
+
+    void printJacobian()
+    {
+        return;
+    }
+
+    void exportSystem(int idx, char *tag, const char *path="export")
+    {
+        return;
+    }
+
+    void exportVector(GlobalEqVector &x, const char *tag="", const char *name="export/x")
+    {
+        printf("n = %lu\n",x.dim());
+    }
+
+    void exportSparsity(const char *path=".")
+    {
+        return;
+    }
+
+    void exportNonzeros(const char *tag="", const char *path=".")
+    {
+        return;
+    }
+
+
+    void setLinearizationType(LinearizationType linearizationType){
+        linearizationType_ = linearizationType;
+    };
 
     const LinearizationType& getLinearizationType() const
     { return linearizationType_; }
diff --git a/opm/models/discretization/common/tpfalinearizer.hh b/opm/models/discretization/common/tpfalinearizer.hh
index 4da966e1071..0a727c43231 100644
--- a/opm/models/discretization/common/tpfalinearizer.hh
+++ b/opm/models/discretization/common/tpfalinearizer.hh
@@ -236,6 +236,8 @@ public:
     {
         simulatorPtr_ = nullptr;
         separateSparseSourceTerms_ = Parameters::Get<Parameters::SeparateSparseSourceTerms>();
+        exportIndex_=-1;
+        exportCount_=-1;
     }
 
     /*!
@@ -412,6 +414,201 @@ public:
     GlobalEqVector& residual()
     { return residual_; }
 
+    /*!
+     * \brief Print first 16 block elements of a vector.
+     */
+    void printVector(GlobalEqVector &x, const char *name="x")
+    {
+        int count = 1;
+        printf("%s =\n[\n",name);
+        for (auto block = x.begin(); block != x.end(); block++)
+        {
+            for (auto i=block->begin(); i!=block->end(); i++)
+            {
+                printf(" %+.4e",*i);
+            }
+            printf("\n");
+            count++;
+            if(count>16) break;
+        }
+        printf("]\n");
+    }
+
+    /*!
+     * \brief Print first 16 block elements of residual.
+     */
+    void printResidual(const char *name="r")
+    {
+        printVector(residual_, name);
+    }
+
+    /*!
+     * \brief Print sparsity pattern of the first 16 rows
+     *        of the jacobian block matrix
+     */
+    void printSparsity(const char *name="s")
+    {
+        auto& A = jacobian_->istlMatrix();
+
+        printf("nrows = %lu\n",A.N());
+        printf("ncols = %lu\n",A.M());
+        printf("nnz   = %lu\n",A.nonzeroes());
+
+        printf("%s =\n[\n",name);
+        int count=1;
+        int offset=0;
+        for(auto row=A.begin(); row!=A.end(); row++)
+        {
+            printf("%4d: ",offset);
+            for(unsigned int i=0;i<row->getsize();i++)
+            {
+                printf(" %4lu",row->getindexptr()[i]);
+            }
+            printf("\n");
+            offset+=row->getsize();
+            count++;
+            if(count>16) break;
+        }
+        printf("]\n");
+    }
+
+    /*!
+     * \brief Print block elements of the first 6 rows of the
+     * j      acobian block matrix
+     */
+    void printNonzeros(const char *name="d")
+    {
+        auto& A = jacobian_->istlMatrix();
+        printf("%s =\n[\n",name);
+        int count=1;
+        for(auto row=A.begin();row!=A.end();row++)
+        {
+            for(unsigned int j=0;j<row->getsize();j++)
+            {
+                printf("|");
+                auto mat = row->getptr()[j];
+                for(auto vec=mat.begin();vec!=mat.end();vec++)
+                {
+                    for(auto k=vec->begin();k!=vec->end();k++)
+                    {
+                        printf(" %+.4e",*k);
+                    }
+                    printf(" |");
+                }
+                printf("\n");
+            }
+            count++;
+            if(count>6) break;
+            printf("\n");
+        }
+        printf("]\n");
+    }
+
+    /*!
+     * \brief Print sparsity pattern and nonzeros of jacobian block matrix
+     */
+    void printJacobian()
+    {
+        printSparsity();
+        printNonzeros();
+    }
+
+    /*!
+     * \brief Export blocks-sparse linear system.
+     */
+    void exportSystem(int idx, char *tag, const char *path="export")
+    {
+        // export sparsity only once
+        if(exportIndex_==-1) exportSparsity(path);
+
+        // increment indices and generate tag
+        exportCount_ = exportIndex_==idx ? ++exportCount_ : 0;
+        exportIndex_ = idx;
+        sprintf(tag,"_%03d_%02d",exportIndex_, exportCount_);
+
+        printf("index = %d\n", exportIndex_);
+        printf("count = %d\n", exportCount_);
+
+        // export matrix
+        exportNonzeros(tag,path);
+
+        // export residual
+        char name[256];
+        sprintf(name,"%s/r",path);
+        exportVector(residual_,tag,name);
+    }
+
+    /*!
+     * \brief Export block vector.
+     */
+    void exportVector(GlobalEqVector &x, const char *tag="", const char *name="export/x")
+    {
+        // assume double precision and contiguous data
+        const double *data = &x[0][0];
+
+        char filename[512];
+        sprintf(filename,"%s%s.f64",name,tag);
+        FILE *out =fopen(filename,"w");
+        fwrite(data, sizeof(double), x.dim(),out);
+        fclose(out);
+    }
+
+    /*!
+     * \brief Export nonzero blocks of jacobian block-sparse matrix
+     */
+    void exportNonzeros(const char *tag="", const char *path=".")
+    {
+        auto& A = jacobian_->istlMatrix();
+
+        // assume double precision and contiguous data
+        const double *data = &A[0][0][0][0];
+        size_t dim = A[0][0].N()*A[0][0].M()*A.nonzeroes();
+
+        char filename[256];
+        sprintf(filename,"%s/data%s.f64",path,tag);
+        FILE *out =fopen(filename,"w");
+        fwrite(data, sizeof(double), dim,out);
+        fclose(out);
+    }
+
+    /*!
+     * \brief Export sparsity pattern of jacobian block-sparse matrix
+     */
+    void exportSparsity(const char *path=".")
+    {
+        //assemble csr graph
+        auto& A = jacobian_->istlMatrix();
+        auto rows = std::make_unique<int[]>(A.N()+1);
+        auto cols = std::make_unique<int[]>(A.nonzeroes());
+
+        int irow=0;
+        int icol=0;
+        rows[0]=0;
+        for(auto row=A.begin(); row!=A.end(); row++)
+        {
+            for(unsigned int i=0;i<row->getsize();i++)
+            {
+                cols[icol++]=row->getindexptr()[i];
+            }
+            rows[irow+1]= rows[irow]+row->getsize();
+            irow++;
+        }
+
+        //export arrays
+        FILE *out;
+        char filename[256];
+
+        sprintf(filename,"%s/rows.i32",path);
+        out=fopen(filename,"w");
+        fwrite(rows, sizeof(int), A.N()+1,out);
+        fclose(out);
+
+        sprintf(filename,"%s/cols.i32",path);
+        out=fopen(filename,"w");
+        fwrite(cols, sizeof(int), A.nonzeroes(),out);
+        fclose(out);
+    }
+
     void setLinearizationType(LinearizationType linearizationType)
     { linearizationType_ = linearizationType; }
 
@@ -1063,6 +1260,9 @@ private:
     bool separateSparseSourceTerms_ = false;
 
     FullDomain<> fullDomain_;
+
+    int exportIndex_;
+    int exportCount_;
 };
 } // namespace Opm
 
diff --git a/opm/simulators/linalg/FlexibleSolver_impl.hpp b/opm/simulators/linalg/FlexibleSolver_impl.hpp
index 4e1d0d1e9c2..b67fc0b12f1 100644
--- a/opm/simulators/linalg/FlexibleSolver_impl.hpp
+++ b/opm/simulators/linalg/FlexibleSolver_impl.hpp
@@ -32,6 +32,10 @@
 #include <opm/simulators/linalg/PreconditionerFactoryGPUIncludeWrapper.hpp>
 #include <opm/simulators/linalg/is_gpu_operator.hpp>
 
+#if HAVE_AVX2_EXTENSION
+#include <opm/simulators/linalg/mixed/wrapper.hpp>
+#endif
+
 #include <dune/common/fmatrix.hh>
 #include <dune/istl/bcrsmatrix.hh>
 #include <dune/istl/solvers.hh>
@@ -181,6 +185,22 @@ namespace Dune
                                                                             tol, // desired residual reduction factor
                                                                             maxiter, // maximum number of iterations
                                                                             verbosity);
+#if HAVE_AVX2_EXTENSION
+          } else if (solver_type == "mixed-bicgstab") {
+              if constexpr (!Opm::is_gpu_operator_v<Operator>) {
+                const std::string prec_type = prm.get<std::string>("preconditioner.type", "error");
+                bool use_mixed_dilu= (prec_type=="mixed-dilu");
+                using MatrixType = decltype(linearoperator_for_solver_->getmat());
+                linsolver_ = std::make_shared<Dune::MixedSolver<VectorType,MatrixType>>(
+                                                                            linearoperator_for_solver_->getmat(),
+                                                                            tol,
+                                                                            maxiter,
+                                                                            use_mixed_dilu
+                                                                        );
+            } else {
+                OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for GPU operators.");
+            }
+#endif
         } else if (solver_type == "loopsolver") {
             linsolver_ = std::make_shared<Dune::LoopSolver<VectorType>>(*linearoperator_for_solver_,
                                                                         *scalarproduct_,
@@ -197,7 +217,6 @@ namespace Dune
                                                                                   restart,
                                                                                   maxiter, // maximum number of iterations
                                                                                   verbosity);
-
         } else {
             if constexpr (!Opm::is_gpu_operator_v<Operator>) {
                 if (solver_type == "flexgmres") {
diff --git a/opm/simulators/linalg/StandardPreconditioners_serial.hpp b/opm/simulators/linalg/StandardPreconditioners_serial.hpp
index b45c7bfb802..0fe680f957d 100644
--- a/opm/simulators/linalg/StandardPreconditioners_serial.hpp
+++ b/opm/simulators/linalg/StandardPreconditioners_serial.hpp
@@ -31,6 +31,19 @@
 
 namespace Opm {
 
+template <class X, class Y>
+class TrivialPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>
+{
+    public:
+    TrivialPreconditioner(){};
+    virtual void update() override {};
+    virtual bool hasPerfectUpdate() const override {return true;}
+    virtual void pre ([[maybe_unused]] X& x, [[maybe_unused]] Y& y) override {};
+    virtual void post ([[maybe_unused]] X& x) override {};
+    virtual void apply ([[maybe_unused]] X& x, [[maybe_unused]] const Y& y) override {};
+    virtual Dune::SolverCategory::Category category() const override { return Dune::SolverCategory::sequential; };
+};
+
 
 template <class Operator>
 struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation, typename std::enable_if_t<!Opm::is_gpu_operator_v<Operator>>>
@@ -71,6 +84,16 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation, typen
             DUNE_UNUSED_PARAMETER(prm);
             return std::make_shared<MultithreadDILU<M, V, V>>(op.getmat());
         });
+        F::addCreator("mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
+            DUNE_UNUSED_PARAMETER(prm);
+            DUNE_UNUSED_PARAMETER(op);
+            return std::make_shared<TrivialPreconditioner<V,V>>();
+        });
+        F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
+            DUNE_UNUSED_PARAMETER(prm);
+            DUNE_UNUSED_PARAMETER(op);
+            return std::make_shared<TrivialPreconditioner<V,V>>();
+        });
         F::addCreator("jac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
             const int n = prm.get<int>("repeats", 1);
             const double w = prm.get<double>("relaxation", 1.0);
diff --git a/opm/simulators/linalg/mixed/README.md b/opm/simulators/linalg/mixed/README.md
new file mode 100644
index 00000000000..988ff58e913
--- /dev/null
+++ b/opm/simulators/linalg/mixed/README.md
@@ -0,0 +1,23 @@
+# Mixed-precision linear solvers
+This folder contains mixed-precision building blocks for Krylov subspace methods
+and a highly optimized mixed-precision implementation of ILU0 preconditioned bicgstab.
+Hopefully, this will inspire the exploration of mixed-precision algorithms in OPM.
+
+The initial implementation is specialized for 3x3 block-sparse matrices due to their
+importance in reservoir simulation and restricted to serial runs. Extending the work
+to parallel runs is a matter of extending OPM's parallel infrastructure and should be
+relatively straight-forward.
+
+The mixed-precision solver is selected by the command-line option `--linear-solver=mixed-ilu0`.
+The command-line option `--matrix-add-well-contributions=true` must also be set as the
+mixed-precision solver operates directly on block-sparse matrices, not on linear operators as
+other OPM solvers do. For convenience, a wrapper similar to the one below can be used.
+
+``` bash
+OMP_NUM_THREADS=1 mpirun -np 1 --map-by numa --bind-to core build/bin/flow \
+    --matrix-add-well-contributions=true \
+    --linear-solver=mixed-ilu0 \
+    --linear-solver-reduction=1e-3 \
+    --linear-solver-max-iter=1024 \
+    $@
+```
diff --git a/opm/simulators/linalg/mixed/bslv.c b/opm/simulators/linalg/mixed/bslv.c
new file mode 100644
index 00000000000..97264bed7f8
--- /dev/null
+++ b/opm/simulators/linalg/mixed/bslv.c
@@ -0,0 +1,243 @@
+#include "bslv.h"
+#include "vec.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+
+bslv_memory *bslv_alloc()
+{
+    bslv_memory *mem = malloc(sizeof(bslv_memory));
+    assert(mem);
+    mem->e    = NULL;
+    mem->dtmp = NULL;
+    return mem;
+}
+
+void bslv_free(bslv_memory *mem)
+{
+    if(mem == NULL) return;
+
+    if(mem->e !=NULL) free(mem->e);
+    if(mem->dtmp !=NULL)
+    {
+        int narrays=7;
+        for(int i=0;i<narrays;i++)
+        {
+            if(mem->dtmp[i] != NULL) free(mem->dtmp[i]);
+        }
+        free(mem->dtmp);
+    }
+    if(mem->P !=NULL) prec_free(mem->P);
+
+    free(mem);
+    mem=NULL;
+}
+
+void bslv_info(bslv_memory *mem, int count)
+{
+    double * restrict e = mem->e;
+    printf("bslv_info: iterations=%d reduction=%.2e\n",count,e[count]);
+}
+
+
+void bslv_init(bslv_memory *mem, double tol, int max_iter, bsr_matrix const *A, bool use_dilu)
+{
+    int n=(A->nrows * A->b);
+
+    mem->use_dilu = use_dilu;
+
+    mem->tol = tol;
+    mem->max_iter = max_iter;
+    mem->n = n;
+
+    mem->e = (double*) malloc(max_iter*sizeof(double));
+    assert(mem->e);
+
+    int narrays=7;
+    mem->dtmp = (double**) malloc(narrays*sizeof(double*));
+    assert(mem->dtmp);
+
+    int np = 8*((n+7)/8); // padded to nearest multiple of 8
+    for(int i=0;i<narrays;i++)
+    {
+        mem->dtmp[i] = aligned_alloc(64,np*sizeof(double));
+        assert(mem->dtmp[i]);
+        for(int k=8*(n/8);k<np;k++) mem->dtmp[i][k] = 0.0; //zeroing out padded section
+    }
+
+    mem->P = prec_alloc();
+    prec_init(mem->P, A); // initialize structure of L,D,U components of P
+}
+
+double __attribute__((noinline)) vec_inner2(const double *a, const double *b, int n)
+{
+    const double *x = __builtin_assume_aligned(a,64);
+    const double *y = __builtin_assume_aligned(b,64);
+
+    int const N=8;
+    double agg[N];
+    for(int i=0;i<N;i++) agg[i]=0.0;
+    for(int i=0;i<n;i+=N)
+    {
+        for(int j=0;j<N;j++) agg[j]+=x[i+j]*y[i+j];
+    }
+    //for(int j=0;j<8;j++) agg[j]+=agg[j+8];
+    for(int j=0;j<4;j++) agg[j]+=agg[j+4];
+    for(int j=0;j<2;j++) agg[j]+=agg[j+2];
+    for(int j=0;j<1;j++) agg[j]+=agg[j+1];
+
+    return agg[0];
+
+}
+
+int bslv_pbicgstab3m(bslv_memory *mem, bsr_matrix *A, const double *b, double *x)
+{
+
+    double tol = mem->tol;
+    int max_iter = mem->max_iter;
+    int n = mem->n;
+
+    double * restrict e = mem->e;
+    const double *r0 = b;
+    //const double * restrict r0  = mem->dtmp[0]; //access randomly initialized one-dimensional shadow space
+    double * restrict p_j = mem->dtmp[1];
+    double * restrict q_j = mem->dtmp[2];
+    double * restrict r_j = mem->dtmp[3];
+    double * restrict s_j = mem->dtmp[4];
+    double * restrict t_j = mem->dtmp[5];
+    double * restrict v_j = mem->dtmp[6];
+    double * restrict x_j = x;
+
+    prec_t * restrict P = mem->P;
+    mem->use_dilu ? prec_dilu_factorize(P,A) : prec_ilu0_factorize(P,A); // choose dilu or ilu0
+    prec_downcast(P);
+
+    vec_fill(x_j,0.0,n);
+    vec_copy(r_j,b,n);
+    vec_copy(p_j,b,n);
+
+    vec_copy(q_j,p_j,n);
+    //double norm_0 = sqrt(vec_inner(r_j,r_j,n));
+    double norm_0 = sqrt(vec_inner2(r_j,r_j,n));
+
+    //double rho_j = vec_inner(r0,r_j,n);
+    double rho_j = vec_inner2(r0,r_j,n);
+    int j;
+    for(j=0;j<max_iter;j++)
+    {
+        //vec_copy(q_j,p_j,n);                                        //q_j=p_j
+        prec_mapply3c(P,q_j);                                          //q_j=P.q_j;
+        bsr_vmspmv3(A,q_j,v_j);                                        //v_j= A.q_j
+
+        //double alpha_j = rho_j/vec_inner(r0,v_j,n);
+        double alpha_j = rho_j/vec_inner2(r0,v_j,n);
+        for (int k=0;k<n;k++) q_j[k] = s_j[k] = r_j[k]-alpha_j*v_j[k];       // r_j and s_j can overwrite each other
+        //vec_axpy(-alpha_j,v_j,r_j,s_j,n);
+
+        //vec_copy(q_j,s_j,n);                                        //q_j=s_j
+        prec_mapply3c(P,q_j);                                          //q_j=P.q_j;
+        bsr_vmspmv3(A,q_j,t_j);                                        //t_j= A.q_j
+
+        //double w_j = vec_inner(s_j,t_j,n)/vec_inner(t_j,t_j,n);
+        double w_j = vec_inner2(s_j,t_j,n)/vec_inner2(t_j,t_j,n);
+        for (int k=0;k<n;k++) x_j[k] += alpha_j*p_j[k] + w_j*s_j[k];
+        for (int k=0;k<n;k++) r_j[k]  =         s_j[k] - w_j*t_j[k];
+        //vec_axpy(-w_j,t_j,s_j,r_j,n);
+        //double norm_e = sqrt(vec_inner(r_j,r_j,n));
+        double norm_e = sqrt(vec_inner2(r_j,r_j,n));
+        e[j+1]=norm_e/norm_0;
+
+        if (norm_e<tol*norm_0) break;                               //convergence check
+
+        double rho_0 =rho_j;
+        //rho_j=vec_inner(r0,r_j,n);
+        rho_j=vec_inner2(r0,r_j,n);
+
+        double beta_j = (alpha_j/w_j)*(rho_j/rho_0);
+        for (int k=0;k<n;k++) q_j[k] = p_j[k] = r_j[k] + beta_j*(p_j[k] - w_j*v_j[k]);
+    }
+    prec_mapply3c(P,x_j);                                       //x_j=P.x_j;
+
+    return j == max_iter ? j : ++j;
+}
+
+
+int bslv_pbicgstab3d(bslv_memory *mem, bsr_matrix *A, const double *b, double *x)
+{
+
+    double tol = mem->tol;
+    int max_iter = mem->max_iter;
+    int n = mem->n;
+
+    double * restrict e = mem->e;
+    const double *r0 = b;
+    //const double * restrict r0  = mem->dtmp[0]; //access randomly initialized one-dimensional shadow space
+    double * restrict p_j = mem->dtmp[1];
+    double * restrict q_j = mem->dtmp[2];
+    double * restrict r_j = mem->dtmp[3];
+    double * restrict s_j = mem->dtmp[4];
+    double * restrict t_j = mem->dtmp[5];
+    double * restrict v_j = mem->dtmp[6];
+    double * restrict x_j = x;
+
+    prec_t * restrict P = mem->P;
+    mem->use_dilu ? prec_dilu_factorize(P,A) : prec_ilu0_factorize(P,A); // choose dilu or ilu0
+    prec_downcast(P);
+
+    vec_fill(x_j,0.0,n);
+    vec_copy(r_j,b,n);
+    vec_copy(p_j,b,n);
+
+    vec_copy(q_j,p_j,n);
+    //double norm_0 = sqrt(vec_inner(r_j,r_j,n));
+    double norm_0 = sqrt(vec_inner2(r_j,r_j,n));
+
+    //double rho_j = vec_inner(r0,r_j,n);
+    double rho_j = vec_inner2(r0,r_j,n);
+    int j;
+    for(j=0;j<max_iter;j++)
+    {
+        //vec_copy(q_j,p_j,n);                                        //q_j=p_j
+        prec_dapply3c(P,q_j);                                          //q_j=P.q_j;
+        bsr_vdspmv3(A,q_j,v_j);                                        //v_j= A.q_j
+
+        //double alpha_j = rho_j/vec_inner(r0,v_j,n);
+        double alpha_j = rho_j/vec_inner2(r0,v_j,n);
+        for (int k=0;k<n;k++) q_j[k] = s_j[k] = r_j[k]-alpha_j*v_j[k];       // r_j and s_j can overwrite each other
+        //vec_axpy(-alpha_j,v_j,r_j,s_j,n);
+
+        //vec_copy(q_j,s_j,n);                                        //q_j=s_j
+        prec_dapply3c(P,q_j);                                          //q_j=P.q_j;
+        bsr_vdspmv3(A,q_j,t_j);                                        //t_j= A.q_j
+
+        //double w_j = vec_inner(s_j,t_j,n)/vec_inner(t_j,t_j,n);
+        double w_j = vec_inner2(s_j,t_j,n)/vec_inner2(t_j,t_j,n);
+        for (int k=0;k<n;k++) x_j[k] += alpha_j*p_j[k] + w_j*s_j[k];
+        for (int k=0;k<n;k++) r_j[k]  =         s_j[k] - w_j*t_j[k];
+        //vec_axpy(-w_j,t_j,s_j,r_j,n);
+        //double norm_e = sqrt(vec_inner(r_j,r_j,n));
+        double norm_e = sqrt(vec_inner2(r_j,r_j,n));
+        e[j+1]=norm_e/norm_0;
+
+        if (norm_e<tol*norm_0) break;                               //convergence check
+
+        double rho_0 =rho_j;
+        //rho_j=vec_inner(r0,r_j,n);
+        rho_j=vec_inner2(r0,r_j,n);
+
+        double beta_j = (alpha_j/w_j)*(rho_j/rho_0);
+        for (int k=0;k<n;k++) q_j[k] = p_j[k] = r_j[k] + beta_j*(p_j[k] - w_j*v_j[k]);
+    }
+    prec_dapply3c(P,x_j);                                       //x_j=P.x_j;
+
+    return j == max_iter ? j : ++j;
+}
+
+
+
+
+
+
diff --git a/opm/simulators/linalg/mixed/bslv.h b/opm/simulators/linalg/mixed/bslv.h
new file mode 100644
index 00000000000..c2b83f8849e
--- /dev/null
+++ b/opm/simulators/linalg/mixed/bslv.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#include "bsr.h"
+#include "prec.h"
+
+#include <stdbool.h>
+
+// Solver memory struct
+typedef
+struct bslv_memory
+{
+    bool use_dilu;
+
+    double tol;
+    int    max_iter;
+    double *e;
+
+    int n;
+    double **dtmp;
+
+    prec_t *P;
+}
+bslv_memory;
+
+/**
+ * @brief Create empty solver memory object.
+ *
+ * @return Pointer to solver memory object.
+ */
+bslv_memory *bslv_alloc();
+
+/**
+ * @brief Delete solver memroy object.
+ *
+ * @param mem Pointer to solver memory object.
+ */
+void bslv_free(bslv_memory *mem);
+
+/**
+ * @brief Display convergence information.
+ *
+ * @param mem Pointer to solver memory object.
+ * @param count Number of linear iterations (returned by linear solver)
+ */
+void bslv_info(bslv_memory *mem, int count);
+
+/**
+ * @brief Initialize solver memory object.
+ *
+ * @param mem Pointer to solver memory object.
+ * @param tol Linear solver tolerance.
+ * @apram max_iter Maximum number of linear iterations.
+ * @param A Pointer to coeffient matrix in bsr format.
+ * @param use_dilu: Select DILU preconditioner if true. Otherwise use ILU9.
+ */
+void bslv_init(bslv_memory *mem, double tol, int max_iter, bsr_matrix const *A, bool use_dilu);
+
+/**
+ * @brief Preconditioned bicgstab in mixed-precision.
+ *
+ * @note Preconditioner is either ILU0 or DILU based on value of mem->use_dilu
+ *
+ * @param mem Pointer to solver memory object.
+ * @param A Pointer to coeffient matrix in bsr format
+ * @param b Pointer to right-hand side vector
+ * @apram x Pointer to solution vector
+ *
+ * @return Number of linear iterations.
+ */
+int  bslv_pbicgstab3m(bslv_memory *mem, bsr_matrix *A, const double *b, double *x);
+
+/**
+ * @brief Preconditioned bicgstab in double-precision.
+ *
+ * @note Preconditioner is either ILU0 or DILU based on value of mem->use_dilu
+ *
+ * @param mem Pointer to solver memory object.
+ * @param A Pointer to coeffient matrix in bsr format
+ * @param b Pointer to right-hand side vector
+ * @apram x Pointer to solution vector
+ *
+ * @return Number of linear iterations.
+ */
+int  bslv_pbicgstab3d(bslv_memory *mem, bsr_matrix *A, const double *b, double *x);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/opm/simulators/linalg/mixed/bsr.c b/opm/simulators/linalg/mixed/bsr.c
new file mode 100644
index 00000000000..ead89998575
--- /dev/null
+++ b/opm/simulators/linalg/mixed/bsr.c
@@ -0,0 +1,206 @@
+#include "bsr.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+
+#include <immintrin.h>
+
+
+bsr_matrix* bsr_alloc()
+{
+    bsr_matrix *A=malloc(sizeof(bsr_matrix));
+    assert(A);
+    A->nrows = 0;
+    A->ncols = 0;
+    A->nnz   = 0;
+    A->b     = 0;
+
+    A->rowptr = NULL;
+    A->colidx = NULL;
+    A->dbl    = NULL;
+    A->flt    = NULL;
+
+    return A;
+}
+
+void bsr_free(bsr_matrix *A)
+{
+    if(A==NULL) return;
+
+    if(A->flt    != NULL) free(A->flt);
+    if(A->dbl    != NULL) free(A->dbl);
+    if(A->colidx != NULL) free(A->colidx);
+    if(A->rowptr != NULL) free(A->rowptr);
+
+    free(A);
+    A=NULL;
+}
+
+void bsr_init(bsr_matrix *A, int nrows, int nnz, int b)
+{
+    A->nrows=nrows;
+    A->ncols=nrows;
+    A->nnz=nnz;
+    A->b=b;
+
+    A->rowptr = malloc((nrows+1)*sizeof(int));
+    A->colidx = malloc(nnz*sizeof(int));
+    A->dbl    = malloc(b*b*nnz*sizeof(double));
+    A->flt    = malloc(b*b*nnz*sizeof(float));
+
+    assert(A->rowptr);
+    assert(A->colidx);
+    assert(A->dbl);
+    assert(A->flt);
+}
+
+void bsr_info(bsr_matrix *A)
+{
+    printf("nrows = %d\n",A->nrows);
+    printf("ncols = %d\n",A->ncols);
+    printf("nnz   = %d\n",A->nnz);
+    printf("b     = %d\n",A->b);
+
+    printf("rowptr= 0x%08lX\n",(uint64_t)A->rowptr);
+    printf("colidx= 0x%08lX\n",(uint64_t)A->colidx);
+    printf("dbl   = 0x%08lX\n",(uint64_t)A->dbl);
+
+    printf("\n");
+}
+
+void bsr_vmspmv3(bsr_matrix *A, const double *x, double *y)
+{
+    int nrows = A->nrows;
+    int *rowptr=A->rowptr;
+    int *colidx=A->colidx;
+    const float *data=A->flt;
+
+    const int b=3;
+
+    __m256d mm_zeros =_mm256_setzero_pd();
+    for(int i=0;i<nrows;i++)
+    {
+        __m256d vA[3];
+        for(int k=0;k<3;k++) vA[k] = mm_zeros;
+        for(int k=rowptr[i];k<rowptr[i+1];k++)
+        {
+            const float *AA=data+9*k;
+
+            int j = colidx[k];
+            __m256d vx = _mm256_loadu_pd(x+b*j);
+
+            vA[0] += _mm256_cvtps_pd(_mm_loadu_ps(AA+0))*_mm256_permute4x64_pd(vx,0b00000000); //0b01010101
+            vA[1] += _mm256_cvtps_pd(_mm_loadu_ps(AA+3))*_mm256_permute4x64_pd(vx,0b01010101); //0b01010101
+            vA[2] += _mm256_cvtps_pd(_mm_loadu_ps(AA+6))*_mm256_permute4x64_pd(vx,0b10101010); //0b01010101
+        }
+
+        // sum over columns
+        __m256d vy, vz;
+        vz = vA[0] + vA[1] + vA[2];
+
+        double *y_i = y+b*i;
+        vy = _mm256_loadu_pd(y_i);       // optional blend to keep
+        vz =_mm256_blend_pd(vy,vz,0x7);  // 4th element unchanged
+        _mm256_storeu_pd(y_i,vz);
+    }
+}
+
+void bsr_vdspmv3(bsr_matrix *A, const double *x, double *y)
+{
+    int nrows = A->nrows;
+    int *rowptr=A->rowptr;
+    int *colidx=A->colidx;
+    const double *data=A->dbl;
+
+    const int b=3;
+
+    __m256d mm_zeros =_mm256_setzero_pd();
+    for(int i=0;i<nrows;i++)
+    {
+        __m256d vA[3];
+        for(int k=0;k<3;k++) vA[k] = mm_zeros;
+        for(int k=rowptr[i];k<rowptr[i+1];k++)
+        {
+            const double *AA=data+9*k;
+
+            int j = colidx[k];
+            __m256d vx = _mm256_loadu_pd(x+b*j);
+
+            vA[0] += _mm256_loadu_pd(AA+0)*_mm256_permute4x64_pd(vx,0b00000000); //0b01010101
+            vA[1] += _mm256_loadu_pd(AA+3)*_mm256_permute4x64_pd(vx,0b01010101); //0b01010101
+            vA[2] += _mm256_loadu_pd(AA+6)*_mm256_permute4x64_pd(vx,0b10101010); //0b01010101
+        }
+
+        // sum over columns
+        __m256d vy, vz;
+        vz = vA[0] + vA[1] + vA[2];
+
+        double *y_i = y+b*i;
+        vy = _mm256_loadu_pd(y_i);       // optional blend to keep
+        vz =_mm256_blend_pd(vy,vz,0x7);  // 4th element unchanged
+        _mm256_storeu_pd(y_i,vz);
+    }
+}
+
+
+
+void bsr_downcast(bsr_matrix *M)
+{
+    int nnz = M->nnz;
+    int b = M->b;
+
+    assert(M->flt);
+    for(int i=0;i<b*b*nnz;i++) M->flt[i]=M->dbl[i];
+}
+
+
+void bsr_sparsity(const bsr_matrix *A, const char *name)
+{
+    printf("%s =\n[\n",name);
+    int count=1;
+    int offset=0;
+    for(int i=0; i<A->nrows; i++)
+    {
+        printf("%4d: ",offset);
+        for(int j=A->rowptr[i];j<A->rowptr[i+1];j++)
+        {
+            printf(" %4d",A->colidx[j]);
+            offset++;
+        }
+        printf("\n");
+        count++;
+        if(count>16) break;
+    }
+    printf("]\n");
+}
+
+void bsr_nonzeros(bsr_matrix *A, const char *name)
+{
+    printf("%s =\n[\n",name);
+    int count=1;
+    int b=A->b;
+    int bb=b*b;
+    for(int i=0; i<A->nrows; i++)
+    {
+        for(int j=A->rowptr[i];j<A->rowptr[i+1];j++)
+        {
+            printf("|");
+            for(int m=0;m<b;m++)
+            {
+                for(int n=0;n<b;n++)
+                {
+                    printf(" %+.4e",A->dbl[j*bb + m*b + n]);
+                }
+                printf(" |");
+            }
+            printf("\n");
+        }
+        count++;
+        if(count>6) break;
+        printf("\n");
+    }
+    printf("]\n");
+}
+
diff --git a/opm/simulators/linalg/mixed/bsr.h b/opm/simulators/linalg/mixed/bsr.h
new file mode 100644
index 00000000000..f58a0221790
--- /dev/null
+++ b/opm/simulators/linalg/mixed/bsr.h
@@ -0,0 +1,103 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// bsr matrix struct
+typedef
+struct bsr_matrix
+{
+    int nrows;
+    int ncols;
+    int nnz;
+    int b;
+
+    int *rowptr;
+    int *colidx;
+    double *dbl;
+    float *flt;
+
+} bsr_matrix;
+
+/**
+ * @brief Create empty bsr matrix.
+ *
+ * @return Pointer to bsr matrix.
+ */
+bsr_matrix* bsr_alloc();
+
+/**
+ * @brief Delete bsr matrix.
+ *
+ * @param A Pointer to bsr matrix.
+ */
+void bsr_free(bsr_matrix *A);
+
+/**
+ * @brief Initialize bsr matrix.
+ *
+ * @param A Pointer to bsr matrix.
+ * @param nrows Number of rows.
+ * @apram nnz Number of nonzero blocks.
+ * @param b Block size.
+ */
+void bsr_init(bsr_matrix *A, int nrows, int nnz, int b);
+
+/**
+ * @brief Sparse matrix-vector multiplication in mixed precision.
+ *
+ * @note Function is specialized for 3x3 block-sparse matrices.
+ * @note Function uses AVX2 intrinsics.
+ *
+ * @param A Pointer to bsr matrix.
+ * @param x Pointer to input vector.
+ * @param y Pointer to output vector.
+ */
+void bsr_vmspmv3(bsr_matrix *A, const double *x, double *y);
+
+/**
+ * @brief Sparse matrix-vector multiplication in double precision.
+ *
+ * @note Function is specialized for 3x3 block-sparse matrices.
+ * @note Function uses AVX2 intrinsics.
+ *
+ * @param A Pointer to bsr matrix.
+ * @param x Pointer to input vector.
+ * @param y Pointer to output vector.
+ */
+void bsr_vdspmv3(bsr_matrix *A, const double *x, double *y);
+
+/**
+ * @brief Make single-precision copy of double-precision values.
+ *
+ * @param M Pointer to bsr matrix
+ */
+void bsr_downcast(bsr_matrix *M);
+
+/**
+ * @brief Display matrix statistics.
+ *
+ * @param A Pointer to bsr matrix
+ */
+void bsr_info(bsr_matrix *A);
+
+/**
+ * @brief Display spasity pattern of first few rows.
+ *
+ * @param A Pointer to bsr matrix
+ * @param name String with desired name of bsr_matrix
+ */
+void bsr_sparsity(const bsr_matrix *A, const char *name);
+
+/**
+ * @brief Display nonzero blocks of first few rows.
+ *
+ * @param A Pointer to bsr matrix
+ * @param name String with desired name of bsr_matrix
+ */
+void bsr_nonzeros(bsr_matrix *A, const char *name);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/opm/simulators/linalg/mixed/prec.c b/opm/simulators/linalg/mixed/prec.c
new file mode 100644
index 00000000000..f0b9ea6d010
--- /dev/null
+++ b/opm/simulators/linalg/mixed/prec.c
@@ -0,0 +1,627 @@
+#include "prec.h"
+
+#include <stdio.h>
+#include <assert.h>
+#include <immintrin.h>
+
+
+prec_t *prec_alloc()
+{
+    prec_t *P = malloc(sizeof(prec_t));
+    assert(P);
+    P->L=bsr_alloc();
+    P->D=bsr_alloc();
+    P->U=bsr_alloc();
+
+    P->noffsets=-1;
+    P->offsets=NULL;
+
+    return P;
+}
+
+void prec_free(prec_t *P)
+{
+    if(P==NULL) return;
+
+    if(P->offsets!=NULL) free(P->offsets);
+    bsr_free(P->U);
+    bsr_free(P->D);
+    bsr_free(P->L);
+
+    free(P);
+    P=NULL;
+}
+
+int prec_analyze(bsr_matrix *M, int (*offsets)[3])
+{
+    int count=0;
+    for(int i=0;i<M->nrows;i++)
+    {
+        for(int z=M->rowptr[i];z<M->rowptr[i+1];z++)
+        {
+            int j = M->colidx[z];
+            int match=0;
+            for(int m=M->rowptr[j];m<M->rowptr[j+1];m++)
+            {
+                int k = M->colidx[m];
+                for(int n=M->rowptr[i];n<M->rowptr[i+1];n++)
+                {
+                    int jjj=M->colidx[n];
+                    match += (k==jjj);
+                    if(k==jjj)
+                    {
+                        if(offsets)
+                        {
+                            offsets[count][0]=z;//ij
+                            offsets[count][1]=n;//ik
+                            offsets[count][2]=m;//jk
+                        }
+                        count++;
+                    }
+                }
+            }
+        }
+    }
+    return count;
+}
+
+
+void prec_init(prec_t *P, bsr_matrix const *A)
+{
+    int b     = A->b;
+    int nrows = A->nrows;
+    int nnz   = A->nnz;
+
+    bsr_matrix *L=P->L;
+    bsr_matrix *D=P->D;
+    bsr_matrix *U=P->U;
+
+    bsr_init(P->L, nrows, (nnz-nrows)/2, b);
+    bsr_init(P->D, nrows, nrows, b);
+    bsr_init(P->U, nrows, (nnz-nrows)/2, b);
+
+    // Sparsity of L,D, and U components of matrix A.
+    // Sparsity of L is expressed for its transpose
+    // which is identical to the sparsity of U due to
+    // the assumption of structural symmetry.
+    nnz=0;
+    for (int i=0;i<nrows;i++)
+    {
+        L->rowptr[i]=nnz;
+        D->rowptr[i]=i;
+        U->rowptr[i]=nnz;
+        for (int k=A->rowptr[i];k<A->rowptr[i+1];k++)
+        {
+            int j=A->colidx[k];
+            if(j>i)
+            {
+                L->colidx[nnz]=j;
+                U->colidx[nnz]=j;
+                nnz++;
+            }
+        }
+        D->colidx[i]=i;
+    }
+    L->rowptr[nrows]=nnz;
+    D->rowptr[nrows]=nrows;
+    U->rowptr[nrows]=nnz;
+
+    // replace with asserts?
+    L->nnz=nnz;
+    D->nnz=nrows;
+    U->nnz=nnz;
+
+
+    // offsets for off-diagonal updates
+    int count;
+    count = prec_analyze(U,NULL);
+    P->offsets = malloc(3*(count+1)*sizeof(int));
+    assert(P->offsets);
+    count = prec_analyze(U,P->offsets);
+    P->offsets[count][0]=U->nnz;
+    P->noffsets=count;
+}
+
+void mat3_matmul(double *C, const double *A, const double *B)
+{
+    // assume 3x3 column-major matrices
+    // account for possiblity of C and A referring to same memory location
+    double M[9];
+    for(int k=0;k<9;k++) M[k]=0.0;
+    for(int j=0;j<3;j++)
+    {
+        double *m_j = M+3*j;        // j-th column of M
+        for(int k=0;k<3;k++)
+        {
+            double b_kj = B[k+3*j]; // kj-th element of B
+            double const *a_k = A+3*k;    // k-th column of A
+            for(int i=0;i<3;i++) m_j[i] += a_k[i]*b_kj; // |m_j> += |a_k> * b_kj;
+        }
+    }
+    for(int k=0;k<9;k++) C[k]=M[k];
+}
+
+void mat3_matfms(double *C, const double *A, const double *B)
+{
+    // assume 3x3 column-major matrices
+    // account for possiblity of C and A referring to same memory location
+    double M[9];
+    for(int k=0;k<9;k++) M[k]=0.0;
+    for(int j=0;j<3;j++)
+    {
+        double *m_j = M+3*j;        // j-th column of M
+        for(int k=0;k<3;k++)
+        {
+            double b_kj = B[k+3*j]; // kj-th element of B
+            double const *a_k = A+3*k;    // k-th column of A
+            for(int i=0;i<3;i++) m_j[i] += a_k[i]*b_kj; // |m_j> += |a_k> * b_kj;
+        }
+    }
+    for(int k=0;k<9;k++) C[k]-=M[k];
+}
+
+
+void mat3_inv(double *invA, const double *A)
+{
+    double M[9];
+    M[0] = +(A[4]*A[8]-A[5]*A[7]);
+    M[3] = -(A[3]*A[8]-A[5]*A[6]);
+    M[6] = +(A[3]*A[7]-A[4]*A[6]);
+    M[1] = -(A[1]*A[8]-A[2]*A[7]);
+    M[4] = +(A[0]*A[8]-A[2]*A[6]);
+    M[7] = -(A[0]*A[7]-A[1]*A[6]);
+    M[2] = +(A[1]*A[5]-A[2]*A[4]);
+    M[5] = -(A[0]*A[5]-A[2]*A[3]);
+    M[8] = +(A[0]*A[4]-A[1]*A[3]);
+
+    double detA = A[0]*M[0]+A[1]*M[3]+A[2]*M[6];
+    for(int k=0;k<9;k++) invA[k]=M[k]/detA;
+}
+
+inline void vec_copy9(double *y, double const *x)
+{
+    for(int i=0;i<9;i++) y[i]=x[i];
+}
+
+void mat3_rmul(double *A, double const *B)
+{
+    // load left hand matrix
+    __m256d vA[3];
+    vA[0] = _mm256_loadu_pd(A+0);
+    vA[1] = _mm256_loadu_pd(A+3);
+    vA[2] = _mm256_loadu_pd(A+6);
+
+    for(int j=0;j<3;j++)
+    {
+        // load column j of B matrix
+        __m256d vbj   = _mm256_loadu_pd(B+3*j);
+
+        // multiply matrix A with column j of matrix B
+        __m256d vAB[3];
+        vAB[0] = vA[0]*_mm256_permute4x64_pd(vbj,0b00000000); //0b01010101
+        vAB[1] = vA[1]*_mm256_permute4x64_pd(vbj,0b01010101); //0b01010101
+        vAB[2] = vA[2]*_mm256_permute4x64_pd(vbj,0b10101010); //0b01010101
+
+        __m256d vz = vAB[0] + vAB[1] + vAB[2];
+
+        // Store result in  column j of matrix A
+        double z[4];
+        _mm256_store_pd(z,vz);
+        for(int k=0;k<3;k++) A[3*j+k]=z[k];
+    }
+}
+
+void mat3_lmul(double const *A, double *B)
+{
+    // load left hand matrix
+    __m256d vA[3];
+    vA[0] = _mm256_loadu_pd(A+0);
+    vA[1] = _mm256_loadu_pd(A+3);
+    vA[2] = _mm256_loadu_pd(A+6);
+
+    for(int j=0;j<3;j++)
+    {
+        // load column j of B matrix
+        __m256d vbj   = _mm256_loadu_pd(B+3*j);
+
+        // multiply matrix A with column j of matrix B
+        __m256d vAB[3];
+        vAB[0] = vA[0]*_mm256_permute4x64_pd(vbj,0b00000000); //0b01010101
+        vAB[1] = vA[1]*_mm256_permute4x64_pd(vbj,0b01010101); //0b01010101
+        vAB[2] = vA[2]*_mm256_permute4x64_pd(vbj,0b10101010); //0b01010101
+
+        __m256d vz = vAB[0] + vAB[1] + vAB[2];
+
+        // Store result in  column j of matrix B
+        double z[4];
+        _mm256_store_pd(z,vz);
+        for(int k=0;k<3;k++) B[3*j+k]=z[k];
+    }
+}
+
+void mat3_vfms(double *C, double const *A, double const *B)
+{
+    // load left hand matrix
+    __m256d vA[3];
+    vA[0] = _mm256_loadu_pd(A+0);
+    vA[1] = _mm256_loadu_pd(A+3);
+    vA[2] = _mm256_loadu_pd(A+6);
+
+    for(int j=0;j<3;j++)
+    {
+        // load column j of B matrix
+        __m256d vbj   = _mm256_loadu_pd(B+3*j);
+
+        // multiply matrix A with column j of matrix B
+        __m256d vAB[3];
+        vAB[0] = vA[0]*_mm256_permute4x64_pd(vbj,0b00000000); //0b01010101
+        vAB[1] = vA[1]*_mm256_permute4x64_pd(vbj,0b01010101); //0b01010101
+        vAB[2] = vA[2]*_mm256_permute4x64_pd(vbj,0b10101010); //0b01010101
+
+        __m256d vz = vAB[0] + vAB[1] + vAB[2];
+
+        // Store result in  column j of matrix A
+        double z[4];
+        _mm256_store_pd(z,vz);
+        for(int k=0;k<3;k++) C[3*j+k]-=z[k];
+    }
+}
+
+
+void prec_dilu_factorize(prec_t *P, bsr_matrix *A)
+{
+    int nrows = A->nrows;
+    int b     = A->b;
+    int bb    = b*b;
+
+    bsr_matrix *L=P->L;
+    bsr_matrix *D=P->D;
+    bsr_matrix *U=P->U;
+
+    // Splitting values of A into L, D, and U, respectively
+    int kU=0;
+    for(int i=0;i<nrows;i++)
+    {
+        for (int k=A->rowptr[i];k<A->rowptr[i+1];k++)
+        {
+            int j=A->colidx[k];
+            if(j<i)       // struct-transpose of L
+            {
+                int kL = L->rowptr[j];
+                vec_copy9(L->dbl + bb*kL, A->dbl + bb*k);
+                L->rowptr[j]++;
+            }
+            else if(j==i) // struct-copy of D
+            {
+                vec_copy9(D->dbl + bb*i, A->dbl + bb*k);
+            }
+            else if(j>i) // struct-copy of U
+            {
+                vec_copy9(U->dbl + bb*kU, A->dbl + bb*k);
+                kU++;
+            }
+        }
+    }
+    // reset rowptr of L
+    for(int i=nrows;i>0;i--) L->rowptr[i]=L->rowptr[i-1];
+    L->rowptr[0]=0;
+
+    // Factorizing
+    double scale[9]; //hard-coded to 3x3 blocks for now
+    for(int i=0;i<A->nrows;i++)
+    {
+        mat3_inv(scale,D->dbl+i*bb);
+        vec_copy9(D->dbl+bb*i, scale); //store inverse instead to simplify application
+        for(int k=L->rowptr[i];k<L->rowptr[i+1];k++)
+        {
+            //scale column i of L
+            mat3_rmul(L->dbl+k*bb,scale);
+
+            //update diagonal of U
+            int j=L->colidx[k];
+            mat3_vfms(D->dbl+j*bb,L->dbl+k*bb,U->dbl+k*bb);
+
+            //scale row i of U
+            mat3_lmul(scale,U->dbl+k*bb);
+
+            //NOT IMPLEMENTED!
+            for(int m=L->rowptr[j];m<L->rowptr[j+1];m++)
+            {
+                if(L->colidx[m]==j)
+                {
+                    printf("ILU OFF_DIAGONALS NOT IMPLEMENTED!\n");
+                    printf("(%d,%d)",m,j);
+                    getchar();
+                }
+            }
+        }
+    }
+}
+
+void prec_ilu0_factorize(prec_t *P, bsr_matrix *A)
+{
+    int nrows = A->nrows;
+    int b     = A->b;
+    int bb    = b*b;
+
+    bsr_matrix *L=P->L;
+    bsr_matrix *D=P->D;
+    bsr_matrix *U=P->U;
+
+    // Splitting values of A into L, D, and U, respectively
+    int kU=0;
+    for(int i=0;i<nrows;i++)
+    {
+        for (int k=A->rowptr[i];k<A->rowptr[i+1];k++)
+        {
+            int j=A->colidx[k];
+            if(j<i)       // struct-transpose of L
+            {
+                int kL = L->rowptr[j];
+                vec_copy9(L->dbl + bb*kL, A->dbl + bb*k);
+                L->rowptr[j]++;
+            }
+            else if(j==i) // struct-copy of D
+            {
+                vec_copy9(D->dbl + bb*i, A->dbl + bb*k);
+            }
+            else if(j>i) // struct-copy of U
+            {
+                vec_copy9(U->dbl + bb*kU, A->dbl + bb*k);
+                kU++;
+            }
+        }
+    }
+    // reset rowptr of L
+    for(int i=nrows;i>0;i--) L->rowptr[i]=L->rowptr[i-1];
+    L->rowptr[0]=0;
+
+    // Factorizing
+    int idx=0;
+    int next = P->offsets[idx][0];
+    double scale[9]; //hard-coded to 3x3 blocks for now
+    for(int i=0;i<A->nrows;i++)
+    {
+        mat3_inv(scale,D->dbl+i*bb);
+        vec_copy9(D->dbl+bb*i, scale); //store inverse instead to simplify application
+        for(int k=L->rowptr[i];k<L->rowptr[i+1];k++)
+        {
+            //scale column i of L
+            mat3_rmul(L->dbl+k*bb,scale);
+
+            //update diagonal D
+            int j=L->colidx[k];
+            mat3_vfms(D->dbl+j*bb,L->dbl+k*bb,U->dbl+k*bb);
+        }
+
+        while(next<U->rowptr[i+1])
+        {
+            int ij = P->offsets[idx][0];
+            int ik = P->offsets[idx][1];
+            int jk = P->offsets[idx][2];
+
+            //update off-diagonals L and U
+            mat3_vfms(U->dbl+jk*bb,L->dbl+ij*bb,U->dbl+ik*bb);
+            mat3_vfms(L->dbl+jk*bb,L->dbl+ik*bb,U->dbl+ij*bb);
+
+            //update marker
+            next=P->offsets[++idx][0];
+        }
+
+
+        for(int k=L->rowptr[i];k<L->rowptr[i+1];k++)
+        {
+            //scale row i of U
+            mat3_lmul(scale,U->dbl+k*bb);
+        }
+
+    }
+}
+
+
+inline void mat3_vecmul(const double *A, double *x)
+{
+    const int b=3;
+    double z[3];
+    for(int k=0;k<3;k++) z[k]=0;
+    for(int c=0;c<b;c++)
+    {
+        for(int r=0;r<b;r++)
+        {
+            z[r]+=A[c*b+r]*x[c];
+        }
+    }
+    for(int k=0;k<3;k++) x[k]=z[k];
+}
+
+
+inline void mat3_vecfms(double *y, const double *A, const double *x)
+{
+    const int b=3;
+    double z[3];
+    for(int k=0;k<3;k++) z[k]=0;
+    for(int c=0;c<b;c++)
+    {
+        for(int r=0;r<b;r++)
+        {
+            z[r]+=A[b*c+r]*x[c];
+        }
+    }
+    for(int k=0;k<3;k++) y[k]-=z[k];
+}
+
+void prec_mapply3c(prec_t *restrict P, double *x)
+{
+    bsr_matrix *L  = P->L;
+    bsr_matrix *D  = P->D;
+    bsr_matrix *U  = P->U;
+
+    int b=L->b;
+    int bb=b*b;
+
+    __m256d mm256_zero_pd =_mm256_setzero_pd();
+
+    // Lower triangular solve assuming ones on diagonal
+    for(int i=0;i<L->ncols;i++)
+    {
+        __m256d vA[3], vx[3];
+
+        double *xi = x+b*i;
+        __m256d vxi = _mm256_loadu_pd(xi);
+
+        vx[0] = _mm256_permute4x64_pd(vxi,0b00000000); //0b01010101
+        vx[1] = _mm256_permute4x64_pd(vxi,0b01010101); //0b01010101
+        vx[2] = _mm256_permute4x64_pd(vxi,0b10101010); //0b01010101
+        for(int k=L->rowptr[i];k<L->rowptr[i+1];k++)
+        {
+            const float *A = L->flt+k*bb;
+            int j=U->colidx[k]; // should be L, but does not matter die to structural symmetry?
+            vA[0] = _mm256_cvtps_pd(_mm_loadu_ps(A+0))*vx[0];
+            vA[1] = _mm256_cvtps_pd(_mm_loadu_ps(A+3))*vx[1];
+            vA[2] = _mm256_cvtps_pd(_mm_loadu_ps(A+6))*vx[2];
+
+            double *xj = x+b*j;
+            __m256d vxj = _mm256_loadu_pd(xj);
+            __m256d vz = (vxj - vA[0]) - (vA[1] + vA[2]);
+
+            //vz =_mm256_blend_pd(vxj,vz,0x7);  // 4th element unchanged
+            //_mm256_storeu_pd(xj,vz);
+            double z[4];
+            _mm256_store_pd(z,vz);
+            for(int n=0;n<3;n++) xj[n]=z[n];
+        }
+
+        // Muliply by (inverse) diagonal block
+        const float *A = D->flt+i*bb;
+        vA[0] = _mm256_cvtps_pd(_mm_loadu_ps(A+0))*vx[0]; //0b01010101
+        vA[1] = _mm256_cvtps_pd(_mm_loadu_ps(A+3))*vx[1]; //0b01010101
+        vA[2] = _mm256_cvtps_pd(_mm_loadu_ps(A+6))*vx[2]; //0b01010101
+        __m256d vz = vA[0] + vA[1] + vA[2];
+
+        //vz =_mm256_blend_pd(vxi,vz,0x7);  // 4th element unchanged
+        //_mm256_storeu_pd(xi,vz);
+        double z[4];
+        _mm256_store_pd(z,vz);
+        for(int k=0;k<3;k++) xi[k]=z[k];
+    }
+
+    // Upper triangular solve assuming nonzeros stored in original order
+    for(int i=U->ncols;i>0;i--)
+    {
+        __m256d vA[3];
+        for(int k=0;k<3;k++) vA[k]=mm256_zero_pd;
+        for(int k=U->rowptr[i]-1;k>U->rowptr[i-1]-1;k--)
+        {
+            const float *A = U->flt+k*bb;
+            int j=U->colidx[k];
+            __m256d vxj = _mm256_loadu_pd(x+b*j);
+            vA[0] += _mm256_cvtps_pd(_mm_loadu_ps(A+0))*_mm256_permute4x64_pd(vxj,0b00000000); //0b01010101
+            vA[1] += _mm256_cvtps_pd(_mm_loadu_ps(A+3))*_mm256_permute4x64_pd(vxj,0b01010101); //0b01010101
+            vA[2] += _mm256_cvtps_pd(_mm_loadu_ps(A+6))*_mm256_permute4x64_pd(vxj,0b10101010); //0b01010101
+        }
+        double *xi = x+b*(i-1);
+        __m256d vxi = _mm256_loadu_pd(xi);
+        __m256d vz = (vxi - vA[0]) - (vA[1] + vA[2]);
+        vz =_mm256_blend_pd(vxi,vz,0x7);  // 4th element unchanged
+        _mm256_storeu_pd(xi,vz);
+
+        //double z[4];
+        //_mm256_store_pd(z,vz);
+        //for(int k=0;k<3;k++) xi[k]=z[k];
+    }
+}
+
+void prec_dapply3c(prec_t *restrict P, double *x)
+{
+    bsr_matrix *L  = P->L;
+    bsr_matrix *D  = P->D;
+    bsr_matrix *U  = P->U;
+
+    int b=L->b;
+    int bb=b*b;
+
+    __m256d mm256_zero_pd =_mm256_setzero_pd();
+
+    // Lower triangular solve assuming ones on diagonal
+    for(int i=0;i<L->ncols;i++)
+    {
+        __m256d vA[3], vx[3];
+
+        double *xi = x+b*i;
+        __m256d vxi = _mm256_loadu_pd(xi);
+
+        vx[0] = _mm256_permute4x64_pd(vxi,0b00000000); //0b01010101
+        vx[1] = _mm256_permute4x64_pd(vxi,0b01010101); //0b01010101
+        vx[2] = _mm256_permute4x64_pd(vxi,0b10101010); //0b01010101
+        for(int k=L->rowptr[i];k<L->rowptr[i+1];k++)
+        {
+            const double *A = L->dbl+k*bb;
+            int j=U->colidx[k];
+            vA[0] = _mm256_loadu_pd(A+0)*vx[0];
+            vA[1] = _mm256_loadu_pd(A+3)*vx[1];
+            vA[2] = _mm256_loadu_pd(A+6)*vx[2];
+
+            double *xj = x+b*j;
+            __m256d vxj = _mm256_loadu_pd(xj);
+            __m256d vz = (vxj - vA[0]) - (vA[1] + vA[2]);
+
+            //vz =_mm256_blend_pd(vxj,vz,0x7);  // 4th element unchanged
+            //_mm256_storeu_pd(xj,vz);
+            double z[4];
+            _mm256_store_pd(z,vz);
+            for(int kk=0;kk<3;kk++) xj[kk]=z[kk];
+        }
+
+        // Muliply by (inverse) diagonal block
+        const double *A = D->dbl+i*bb;
+        vA[0] = _mm256_loadu_pd(A+0)*vx[0]; //0b01010101
+        vA[1] = _mm256_loadu_pd(A+3)*vx[1]; //0b01010101
+        vA[2] = _mm256_loadu_pd(A+6)*vx[2]; //0b01010101
+        __m256d vz = vA[0] + vA[1] + vA[2];
+
+        //vz =_mm256_blend_pd(vxi,vz,0x7);  // 4th element unchanged
+        //_mm256_storeu_pd(xi,vz);
+        double z[4];
+        _mm256_store_pd(z,vz);
+        for(int k=0;k<3;k++) xi[k]=z[k];
+    }
+
+    // Upper triangular solve assuming nonzeros stored in original order
+    for(int i=U->ncols;i>0;i--)
+    {
+        __m256d vA[3];
+        for(int k=0;k<3;k++) vA[k]=mm256_zero_pd;
+        for(int k=U->rowptr[i]-1;k>U->rowptr[i-1]-1;k--)
+        {
+            const double *A = U->dbl+k*bb;
+            int j=U->colidx[k];
+            __m256d vxj = _mm256_loadu_pd(x+b*j);
+            vA[0] += _mm256_loadu_pd(A+0)*_mm256_permute4x64_pd(vxj,0b00000000); //0b01010101
+            vA[1] += _mm256_loadu_pd(A+3)*_mm256_permute4x64_pd(vxj,0b01010101); //0b01010101
+            vA[2] += _mm256_loadu_pd(A+6)*_mm256_permute4x64_pd(vxj,0b10101010); //0b01010101
+        }
+        double *xi = x+b*(i-1);
+        __m256d vxi = _mm256_loadu_pd(xi);
+        __m256d vz = (vxi - vA[0]) - (vA[1] + vA[2]);
+        vz =_mm256_blend_pd(vxi,vz,0x7);  // 4th element unchanged
+        _mm256_storeu_pd(xi,vz);
+
+        //double z[4];
+        //_mm256_store_pd(z,vz);
+        //for(int k=0;k<3;k++) xi[k]=z[k];
+    }
+}
+
+void prec_downcast(prec_t *P)
+{
+    bsr_downcast(P->L);
+    bsr_downcast(P->D);
+    bsr_downcast(P->U);
+}
+
+void prec_info(prec_t *P)
+{
+    bsr_info(P->L);
+    bsr_info(P->D);
+    bsr_info(P->U);
+}
diff --git a/opm/simulators/linalg/mixed/prec.h b/opm/simulators/linalg/mixed/prec.h
new file mode 100644
index 00000000000..b442c7f7af1
--- /dev/null
+++ b/opm/simulators/linalg/mixed/prec.h
@@ -0,0 +1,105 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "bsr.h"
+
+// preconditioner struct
+typedef
+struct prec_t
+{
+    bsr_matrix *L;
+    bsr_matrix *D;
+    bsr_matrix *U;
+    int noffsets;
+    int(*offsets)[3];
+}
+prec_t;
+
+/**
+ * @brief Create empty preconditioner object.
+ *
+ * @return Pointer to preconditioner object.
+ */
+prec_t *prec_alloc();
+
+/**
+ * @brief Delete preconditioner object.
+ *
+ * @param A Pointer to preconditioner object.
+ */
+void prec_free(prec_t *P);
+
+/**
+ * @brief Initialize preconditioner object.
+ *
+ * @param P Pointer preconditioner object.
+ * @param A Pointer to bsr matrix.
+ */
+void prec_init(prec_t *P, bsr_matrix const *A);
+
+/**
+ * @brief Identify off-diagonal ILU0 targets.
+ *
+ * @param M Pointer to bsr matrix.
+ * @param offsets Pointer to offsets identifying off-diagonal targets.
+ *
+ * @return number of offdiagonal targets.
+ */
+int  prec_analyze(bsr_matrix *M, int (*offsets)[3]);
+
+/**
+ * @brief DILU factorization.
+ *
+ * @param P Pointer preconditioner object.
+ * @param A Pointer to bsr matrix.
+ */
+void prec_dilu_factorize(prec_t *P, bsr_matrix *A);
+
+/**
+ * @brief ILU0 factorization.
+ *
+ * @param P Pointer preconditioner object.
+ * @param A Pointer to bsr matrix.
+ */
+void prec_ilu0_factorize(prec_t *P, bsr_matrix *A);
+
+/**
+ * @brief Preconditioner application in mixed-precision.
+ *
+ * @note Algorithm onsists of lower and upper triangular solves
+ *
+ * @param P Pointer to preconditioner object.
+ * @apram x Pointer to input/output vector
+ */
+void prec_mapply3c(prec_t *P, double *x);
+
+/**
+ * @brief Preconditioner applicationin double-precision.
+ *
+ * @note Algorithm onsists of lower and upper triangular solves
+ *
+ * @param P Pointer to preconditioner object.
+ * @apram x Pointer to input/output vector
+ */
+void prec_dapply3c(prec_t *P, double *x);
+
+/**
+ * @brief Make single-precision copy of double-precision values.
+ *
+ * @param P Pointer to preconditioner object.
+ */
+void prec_downcast(prec_t *P);
+
+/**
+ * @brief Display preconditioner statistics.
+ *
+ * @param P Pointer to preconditioner object.
+ */
+void prec_info(prec_t *P);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/opm/simulators/linalg/mixed/vec.h b/opm/simulators/linalg/mixed/vec.h
new file mode 100644
index 00000000000..0c5a4bdad37
--- /dev/null
+++ b/opm/simulators/linalg/mixed/vec.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void vec_copy(double *y, double const * x, int n)
+{
+    for(int i=0;i<n;i++) y[i]=x[i];
+}
+
+void vec_fill(double *y, double x, int n)
+{
+    for(int i=0;i<n;i++) y[i]=x;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/opm/simulators/linalg/mixed/wrapper.hpp b/opm/simulators/linalg/mixed/wrapper.hpp
new file mode 100644
index 00000000000..da518f776db
--- /dev/null
+++ b/opm/simulators/linalg/mixed/wrapper.hpp
@@ -0,0 +1,105 @@
+#include <opm/simulators/flow/BlackoilModelParameters.hpp>
+#include <opm/simulators/linalg/FlowLinearSolverParameters.hpp>
+
+#include "bsr.h"
+#include "bslv.h"
+
+namespace Dune
+{
+template <class X, class M>
+class MixedSolver : public InverseOperator<X,X>
+{
+    public:
+
+    MixedSolver(const M &A, double tol, int maxiter, bool use_dilu)
+    {
+        // verify that well contributions are added to the matrix
+        if (!Opm::Parameters::Get<Opm::Parameters::MatrixAddWellContributions>()) {
+        OPM_THROW(std::logic_error, "Well operators are currently not supported for mixed precision. "
+        "Use --matrix-add-well-contributions=true to add well contributions to the matrix instead.");}
+
+        int nrows = A.N();
+        int nnz   = A.nonzeroes();
+        int b     = A[0][0].N();
+
+        // verify that block size is 3x3
+        if (b!=3) {OPM_THROW(std::logic_error, "Block sizes other than 3x3 are not supported by mixed precision.");}
+
+        // create jacobian matrix object and allocate various arrays
+        jacobian_ = bsr_alloc();
+        bsr_init(jacobian_,nrows,nnz,b);
+
+        // initialize sparsity pattern
+        int *rows = jacobian_->rowptr;
+        int *cols = jacobian_->colidx;
+
+        int irow = 0;
+        int icol = 0;
+        rows[0]  = 0;
+        for(auto row=A.begin(); row!=A.end(); row++)
+        {
+            for(unsigned int i=0; i<row->getsize(); i++)
+            {
+                cols[icol++] = row->getindexptr()[i];
+            }
+            rows[irow+1]     = rows[irow]+row->getsize();
+            irow++;
+        }
+
+        // allocate and intialize solver memory
+        mem_ = bslv_alloc();
+        bslv_init(mem_, tol, maxiter, jacobian_, use_dilu);
+
+        //pointer to nonzero blocks
+        data_ = &A[0][0][0][0];
+    }
+
+    ~MixedSolver()
+    {
+        bsr_free(jacobian_);
+        bslv_free(mem_);
+
+    }
+
+    virtual void apply (X& x, X& b, InverseOperatorResult& res) override
+    {
+        // transpose each dense block to make them column-major
+        double B[9];
+        for(int k=0;k<jacobian_->nnz;k++)
+        {
+            for(int i=0;i<3;i++) for(int j=0;j<3;j++) B[3*j+i] = data_[9*k + 3*i + j];
+            for(int i=0;i<9;i++) jacobian_->dbl[9*k + i] = B[i];
+        }
+
+        // downcast to allow mixed precision
+        bsr_downcast(jacobian_);
+
+        // solve linear system
+        int count = bslv_pbicgstab3m(mem_, jacobian_, &b[0][0], &x[0][0]);
+        //int count = bslv_pbicgstab3d(mem_, jacobian_, &b[0][0], &x[0][0]);
+
+        // return convergence information
+        res.converged  = (mem_->e[count] < mem_->tol);
+        res.reduction  = mem_->e[count];
+        res.iterations = count;
+    }
+
+    virtual void apply (X& x, X& b, double reduction, InverseOperatorResult& res) override
+    {
+        x=0;
+        b=0;
+        res.reduction = reduction;
+        OPM_THROW(std::invalid_argument, "MixedSolver::apply(...) not implemented yet.");
+
+    }
+
+    virtual Dune::SolverCategory::Category category() const override { return Dune::SolverCategory::sequential; };
+
+    private:
+    bsr_matrix  *jacobian_;
+    bslv_memory *mem_;
+    double const *data_;
+};
+
+}
+
diff --git a/opm/simulators/linalg/setupPropertyTree.cpp b/opm/simulators/linalg/setupPropertyTree.cpp
index d104cc39a27..3697258dbcb 100644
--- a/opm/simulators/linalg/setupPropertyTree.cpp
+++ b/opm/simulators/linalg/setupPropertyTree.cpp
@@ -241,6 +241,16 @@ setupPropertyTree(FlowLinearSolverParameters p, // Note: copying the parameters
         return setupILU(conf, p);
     }
 
+    // mixed-precision ILU0
+    if (conf == "mixed-ilu0") {
+        return setupMixedILU(conf, p);
+    }
+
+    // mixed-precision ILU0
+    if (conf == "mixed-dilu") {
+        return setupMixedDILU(conf, p);
+    }
+
     if (conf == "dilu") {
         return setupDILU(conf, p);
     }
@@ -394,6 +404,33 @@ setupILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverParamet
     return prm;
 }
 
+PropertyTree
+setupMixedILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverParameters& p)
+{
+    using namespace std::string_literals;
+    PropertyTree prm;
+    prm.put("tol", p.linear_solver_reduction_);
+    prm.put("maxiter", p.linear_solver_maxiter_);
+    prm.put("verbosity", p.linear_solver_verbosity_);
+    prm.put("solver", "mixed-bicgstab"s);
+    prm.put("preconditioner.type", "mixed-ilu0"s);
+    return prm;
+}
+
+PropertyTree
+setupMixedDILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverParameters& p)
+{
+    using namespace std::string_literals;
+    PropertyTree prm;
+    prm.put("tol", p.linear_solver_reduction_);
+    prm.put("maxiter", p.linear_solver_maxiter_);
+    prm.put("verbosity", p.linear_solver_verbosity_);
+    prm.put("solver", "mixed-bicgstab"s);
+    prm.put("preconditioner.type", "mixed-dilu"s);
+    return prm;
+}
+
+
 PropertyTree
 setupDILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverParameters& p)
 {
diff --git a/opm/simulators/linalg/setupPropertyTree.hpp b/opm/simulators/linalg/setupPropertyTree.hpp
index 9c80185a8d0..2cb1cc20f22 100644
--- a/opm/simulators/linalg/setupPropertyTree.hpp
+++ b/opm/simulators/linalg/setupPropertyTree.hpp
@@ -37,7 +37,9 @@ PropertyTree setupCPRW(const std::string& conf, const FlowLinearSolverParameters
 PropertyTree setupCPR(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupAMG(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupILU(const std::string& conf, const FlowLinearSolverParameters& p);
+PropertyTree setupMixedILU(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupDILU(const std::string& conf, const FlowLinearSolverParameters& p);
+PropertyTree setupMixedDILU(const std::string& conf, const FlowLinearSolverParameters& p);
 PropertyTree setupUMFPack(const std::string& conf, const FlowLinearSolverParameters& p);
 
 } // namespace Opm