From e485e288810bd49776bcd6d41d285c5f695cb4a9 Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Mon, 4 Nov 2019 01:59:35 -0800
Subject: [PATCH 01/12] Generalized f64 to BlasScalar generic

---
 Cargo.toml |   6 +
 src/lib.rs | 353 ++++++++++++++++++++++++++++-------------------------
 2 files changed, 190 insertions(+), 169 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 635f79c..d587d5a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,13 +12,19 @@ categories = ["science"]
 keywords = ["condition", "linalg", "matrix", "ndarray", "norm"]
 
 [dependencies]
+alga = "0.9.2"
 cblas = "0.2"
 lapacke = "0.2"
 ndarray = "0.12"
+num-traits = "0.2.8"
 ordered-float = "1.0"
 rand = "0.6"
 rand_xoshiro = "0.1"
 
+blas-traits = {git="https://github.com/hmunozb/blas-traits"}
+
 [dev-dependencies]
 ndarray-rand = "0.9"
 openblas-src = "0.7"
+lapacke-sys = "0.1.4"
+
diff --git a/src/lib.rs b/src/lib.rs
index 8d587dd..2674538 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,8 @@
 //! This crate implements the matrix 1-norm estimator by [Higham and Tisseur].
 //!
 //! [Higham and Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
-
+use alga::general::{ComplexField, SupersetOf};
+use blas_traits::BlasScalar;
 use ndarray::{
     prelude::*,
     ArrayBase,
@@ -12,6 +13,7 @@ use ndarray::{
     Ix2,
     s,
 };
+use num_traits::{Float, Zero};
 use ordered_float::NotNan;
 use rand::{
     Rng,
@@ -22,17 +24,18 @@ use rand_xoshiro::Xoshiro256StarStar;
 use std::collections::BTreeSet;
 use std::cmp;
 use std::slice;
+use std::any::TypeId;
 
-pub struct Normest1 {
+pub struct Normest1<T: BlasScalar> where {
     n: usize,
     t: usize,
     rng: Xoshiro256StarStar,
-    x_matrix: Array2<f64>,
-    y_matrix: Array2<f64>,
-    z_matrix: Array2<f64>,
-    w_vector: Array1<f64>,
-    sign_matrix: Array2<f64>,
-    sign_matrix_old: Array2<f64>,
+    x_matrix: Array2<T>,
+    y_matrix: Array2<T>,
+    z_matrix: Array2<T>,
+    w_vector: Array1<T>,
+    sign_matrix: Array2<T>,
+    sign_matrix_old: Array2<T>,
     column_is_parallel: Vec<bool>,
     indices: Vec<usize>,
     indices_history: BTreeSet<usize>,
@@ -56,16 +59,16 @@ pub struct Normest1 {
 ///
 /// It is at the designation of the user to check what is more efficient: to pass in one definite
 /// matrix or choose the alternative route described here.
-trait LinearOperator {
+trait LinearOperator<T: BlasScalar> {
     fn multiply_matrix<S>(&self, b: &mut ArrayBase<S, Ix2>, c: &mut ArrayBase<S, Ix2>, transpose: bool)
-        where S: DataMut<Elem=f64>;
+        where S: DataMut<Elem=T>;
 }
 
-impl<S1> LinearOperator for ArrayBase<S1, Ix2>
-    where S1: Data<Elem=f64>,
+impl<S1, T: BlasScalar> LinearOperator<T> for ArrayBase<S1, Ix2>
+    where S1: Data<Elem=T>,
 {
     fn multiply_matrix<S2>(&self, b: &mut ArrayBase<S2, Ix2>, c: &mut ArrayBase<S2, Ix2>, transpose: bool)
-        where S2: DataMut<Elem=f64>
+        where S2: DataMut<Elem=T>
     {
         let (n_rows, n_cols) = self.dim();
         assert_eq!(n_rows, n_cols, "Number of rows and columns does not match: `self` has to be a square matrix");
@@ -91,37 +94,22 @@ impl<S1> LinearOperator for ArrayBase<S1, Ix2>
         let layout = a_layout;
 
         let a_transpose = if transpose {
-            cblas::Transpose::Ordinary
+            cblas::Transpose::Conjugate     // Simple transpose in real case
         } else {
             cblas::Transpose::None
         };
-
-        unsafe {
-            cblas::dgemm(
-                layout,
-                a_transpose,
-                cblas::Transpose::None,
-                n as i32,
-                t as i32,
-                n as i32,
-                1.0,
-                a_slice,
-                n as i32,
-                b_slice,
-                t as i32,
-                0.0,
-                c_slice,
-                t as i32,
-            )
-        }
+        T::gemm(layout, a_transpose, cblas::Transpose::None,
+                 n as i32, t as i32, n as i32,
+                 T::from_subset(&1.0), a_slice, n as i32, b_slice, t as i32,
+                 T::from_subset(&0.0), c_slice, t as i32,);
     }
 }
 
-impl<S1> LinearOperator for [&ArrayBase<S1, Ix2>]
-    where S1: Data<Elem=f64>
+impl<S1, T: BlasScalar> LinearOperator<T> for [&ArrayBase<S1, Ix2>]
+    where S1: Data<Elem=T>
 {
     fn multiply_matrix<S2>(&self, b: &mut ArrayBase<S2, Ix2>, c: &mut ArrayBase<S2, Ix2>, transpose: bool)
-        where S2: DataMut<Elem=f64>
+        where S2: DataMut<Elem=T>
     {
         if self.len() > 0 {
             let mut reversed;
@@ -151,11 +139,11 @@ impl<S1> LinearOperator for [&ArrayBase<S1, Ix2>]
     }
 }
 
-impl<S1> LinearOperator for (&ArrayBase<S1, Ix2>, usize)
-    where S1: Data<Elem=f64>
+impl<S1, T: BlasScalar> LinearOperator<T> for (&ArrayBase<S1, Ix2>, usize)
+    where S1: Data<Elem=T>
 {
     fn multiply_matrix<S2>(&self, b: &mut ArrayBase<S2, Ix2>, c: &mut ArrayBase< S2, Ix2>, transpose: bool)
-        where S2: DataMut<Elem=f64>
+        where S2: DataMut<Elem=T>
     {
         let a = self.0;
         let m = self.1;
@@ -169,18 +157,18 @@ impl<S1> LinearOperator for (&ArrayBase<S1, Ix2>, usize)
     }
 }
 
-impl Normest1 {
+impl<T: BlasScalar> Normest1<T> {
     pub fn new(n: usize, t: usize) -> Self {
         assert!(t <= n, "Cannot have more iteration columns t than columns in the matrix.");
         let rng = Xoshiro256StarStar::from_rng(&mut thread_rng()).expect("Rng initialization failed.");
-        let x_matrix = unsafe { Array2::<f64>::uninitialized((n, t)) };
-        let y_matrix = unsafe { Array2::<f64>::uninitialized((n, t)) };
-        let z_matrix = unsafe { Array2::<f64>::uninitialized((n, t)) };
+        let x_matrix = unsafe { Array2::uninitialized((n, t)) };
+        let y_matrix = unsafe { Array2::uninitialized((n, t)) };
+        let z_matrix = unsafe { Array2::uninitialized((n, t)) };
 
         let w_vector = unsafe { Array1::uninitialized(n) };
 
-        let sign_matrix = unsafe { Array2::<f64>::uninitialized((n, t)) };
-        let sign_matrix_old = unsafe { Array2::<f64>::uninitialized((n, t)) };
+        let sign_matrix = unsafe { Array2::uninitialized((n, t)) };
+        let sign_matrix_old = unsafe { Array2::uninitialized((n, t)) };
 
         let column_is_parallel = vec![false; t];
 
@@ -206,8 +194,8 @@ impl Normest1 {
         }
     }
 
-    fn calculate<L>(&mut self, a_linear_operator: &L, itmax: usize) -> f64
-        where L: LinearOperator + ?Sized
+    fn calculate<L>(&mut self, a_linear_operator: &L, itmax: usize) -> T::RealField
+        where L: LinearOperator<T> + ?Sized
     {
         assert!(itmax > 1, "normest1 is undefined for iterations itmax < 2");
 
@@ -218,7 +206,7 @@ impl Normest1 {
         let n = self.n;
         let t = self.t;
 
-        let sample = [-1., 1.0];
+        let sample = [-T::one(), T::one()];
 
         // “We now explain our choice of starting matrix. We take the first column of X to be the
         // vector of 1s, which is the starting vector used in Algorithm 2.1. This has the advantage
@@ -232,8 +220,8 @@ impl Normest1 {
         // lessens the importance of counterexamples (see the comments in the next section).”
         {
             let rng_mut = &mut self.rng;
-            self.x_matrix.mapv_inplace(|_| sample[rng_mut.gen_range(0, sample.len())]);
-            self.x_matrix.column_mut(0).fill(1.);
+            self.x_matrix.mapv_inplace(|_| sample[rng_mut.gen_range(0, sample.len())] );
+            self.x_matrix.column_mut(0).fill(T::one());
         }
 
         // Resample the x_matrix to make sure no columns are parallel
@@ -245,9 +233,9 @@ impl Normest1 {
         }
 
         // Set all columns to unit vectors
-        self.x_matrix.mapv_inplace(|x| x / n as f64);
+        self.x_matrix.mapv_inplace(|x| (x / T::from_usize(n).unwrap()) );
 
-        let mut estimate = 0.0;
+        let mut estimate = T::RealField::zero();
         let mut best_index = 0;
 
         'optimization_loop: for k in 0..itmax {
@@ -294,14 +282,17 @@ impl Normest1 {
                 // > or to a column of Sold by replacing columns of S by rand{-1,+1}
                 //
                 // NOTE: We are reusing `y_matrix` here as a temporary value.
-                resample_parallel_columns(
-                    &mut self.sign_matrix,
-                    &self.sign_matrix_old,
-                    &mut self.y_matrix,
-                    &mut self.column_is_parallel,
-                    &mut self.rng,
-                    &sample,
-                );
+                // Note: Parallel column test can be skipped in complex case
+                if TypeId::of::<T>() == TypeId::of::<f32>() || TypeId::of::<T>() == TypeId::of::<f64>(){
+                    resample_parallel_columns(
+                        &mut self.sign_matrix,
+                        &self.sign_matrix_old,
+                        &mut self.y_matrix,
+                        &mut self.column_is_parallel,
+                        &mut self.rng,
+                        &sample,
+                    );
+                }
             }
 
             // > est_old = est, Sold = S
@@ -315,19 +306,20 @@ impl Normest1 {
             self.sign_matrix_old.assign(&self.sign_matrix);
 
             // Z = A^T S
+            //
             a_linear_operator.multiply_matrix(&mut self.sign_matrix, &mut self.z_matrix, true);
 
             // hᵢ= ‖Z(i,:)‖_∞
-            let mut max_h = 0.0;
+            let mut max_h = T::RealField::zero();
             for (row, h_element) in self.z_matrix.genrows().into_iter().zip(self.h.iter_mut()) {
-                let h = vector_maxnorm(&row);
+                let h : T::RealField = vector_maxnorm(&row);
                 max_h = if h > max_h { h } else { max_h };
                 // Convert f64 to NotNan for using sort_unstable_by below
-                *h_element = h.into();
+                *h_element = h.to_subset().unwrap().into();
             }
 
             // TODO: This test for equality needs an approximate equality test instead.
-            if k > 0 && max_h == self.h[best_index].into() {
+            if k > 0 && max_h == T::RealField::from_subset(&self.h[best_index].into()) {
                 break 'optimization_loop
             }
 
@@ -339,7 +331,7 @@ impl Normest1 {
                 self.indices.sort_unstable_by(|i, j| h_ref[*j].cmp(&h_ref[*i]));
             }
 
-            self.x_matrix.fill(0.0);
+            self.x_matrix.fill(T::zero());
             if t > 1 {
                 // > Replace ind(1:t) by the first t indices in ind(1:n) that are not in ind_hist.
                 //
@@ -370,11 +362,11 @@ impl Normest1 {
                 for i in (&mut index_iterator).take(t) {
                     if !self.indices_history.contains(i) {
                         all_first_t_in_history = false;
-                        self.x_matrix[(*i, current_column_fresh)] = 1.0;
+                        self.x_matrix[(*i, current_column_fresh)] = T::one();
                         current_column_fresh += 1;
                         self.indices_history.insert(*i);
                     } else if current_column_historical < t {
-                        self.x_matrix[(*i, current_column_historical)] = 1.0;
+                        self.x_matrix[(*i, current_column_historical)] = T::one();
                         current_column_historical += 1;
                     }
                 }
@@ -390,11 +382,11 @@ impl Normest1 {
                         break 'fill_x;
                     }
                     if !self.indices_history.contains(i) {
-                        self.x_matrix[(*i, current_column_fresh)] = 1.0;
+                        self.x_matrix[(*i, current_column_fresh)] = T::one();
                         current_column_fresh += 1;
                         self.indices_history.insert(*i);
                     } else if current_column_historical < t {
-                        self.x_matrix[(*i, current_column_historical)] = 1.0;
+                        self.x_matrix[(*i, current_column_historical)] = T::one();
                         current_column_historical += 1;
                     }
                 }
@@ -405,22 +397,22 @@ impl Normest1 {
     }
 
     /// Estimate the 1-norm of matrix `a` using up to `itmax` iterations.
-    pub fn normest1<S>(&mut self, a: &ArrayBase<S, Ix2>, itmax: usize) -> f64
-        where S: Data<Elem=f64>,
+    pub fn normest1<S>(&mut self, a: &ArrayBase<S, Ix2>, itmax: usize) -> T::RealField
+        where S: Data<Elem=T>,
     {
         self.calculate(a, itmax)
     }
 
     /// Estimate the 1-norm of a marix `a` to the power `m` up to `itmax` iterations.
-    pub fn normest1_pow<S>(&mut self, a: &ArrayBase<S, Ix2>, m: usize, itmax: usize) -> f64
-        where S: Data<Elem=f64>,
+    pub fn normest1_pow<S>(&mut self, a: &ArrayBase<S, Ix2>, m: usize, itmax: usize) -> T::RealField
+        where S: Data<Elem=T>,
     {
         self.calculate(&(a, m), itmax)
     }
 
     /// Estimate the 1-norm of a product of matrices `a1 a2 ... an` up to `itmax` iterations.
-    pub fn normest1_prod<S>(&mut self, aprod: &[&ArrayBase<S, Ix2>], itmax: usize) -> f64
-        where S: Data<Elem=f64>,
+    pub fn normest1_prod<S>(&mut self, aprod: &[&ArrayBase<S, Ix2>], itmax: usize) -> T::RealField
+        where S: Data<Elem=T>,
     {
         self.calculate(aprod, itmax)
     }
@@ -436,12 +428,12 @@ impl Normest1 {
 ///
 /// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
 /// [`Normest1`]: struct.Normest1.html
-pub fn normest1(a_matrix: &Array2<f64>, t: usize, itmax: usize) -> f64
+pub fn normest1<T: BlasScalar>(a_matrix: &Array2<T>, t: usize, itmax: usize) -> T::RealField
 {
     // Assume the matrix is square and take the columns as n. If it's not square, the assertion in
     // normest.calculate will fail.
     let n = a_matrix.dim().1;
-    let mut normest1 = Normest1::new(n, t);
+    let mut normest1 : Normest1<T> = Normest1::new(n, t);
     normest1.normest1(a_matrix, itmax)
 }
 
@@ -454,7 +446,7 @@ pub fn normest1(a_matrix: &Array2<f64>, t: usize, itmax: usize) -> f64
 /// 1-norm on matrices of the same size, construct a [`Normest1`] first, and call its methods.
 ///
 /// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
-pub fn normest1_pow(a_matrix: &Array2<f64>, m: usize, t: usize, itmax: usize) -> f64
+pub fn normest1_pow<T: BlasScalar>(a_matrix: &Array2<T>, m: usize, t: usize, itmax: usize) -> T::RealField
 {
     // Assume the matrix is square and take the columns as n. If it's not square, the assertion in
     // normest.calculate will fail.
@@ -473,7 +465,7 @@ pub fn normest1_pow(a_matrix: &Array2<f64>, m: usize, t: usize, itmax: usize) ->
 /// 1-norm on matrices of the same size, construct a [`Normest1`] first, and call its methods.
 ///
 /// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
-pub fn normest1_prod(a_matrices: &[&Array2<f64>], t: usize, itmax: usize) -> f64
+pub fn normest1_prod<T: BlasScalar>(a_matrices: &[&Array2<T>], t: usize, itmax: usize) -> T::RealField
 {
     assert!(a_matrices.len() > 0);
     let n = a_matrices[0].dim().1;
@@ -485,9 +477,9 @@ pub fn normest1_prod(a_matrices: &[&Array2<f64>], t: usize, itmax: usize) -> f64
 ///
 /// Panics if matrices `a` and `b` have different shape and strides, or if either underlying array is
 /// non-contiguous. This is to make sure that the iteration order over the matrices is the same.
-fn assign_signum_of_array<S1, S2, D>(a: &ArrayBase<S1, D>, b: &mut ArrayBase<S2, D>)
-    where S1: Data<Elem=f64>,
-          S2: DataMut<Elem=f64>,
+fn assign_signum_of_array<S1, S2, D, T: BlasScalar>(a: &ArrayBase<S1, D>, b: &mut ArrayBase<S2, D>)
+    where S1: Data<Elem=T>,
+          S2: DataMut<Elem=T>,
           D: Dimension
 {
     assert_eq!(a.strides(), b.strides());
@@ -498,15 +490,15 @@ fn assign_signum_of_array<S1, S2, D>(a: &ArrayBase<S1, D>, b: &mut ArrayBase<S2,
     signum_of_slice(a_slice, b_slice);
 }
 
-fn signum_of_slice(source: &[f64], destination: &mut [f64]) {
+fn signum_of_slice<T: BlasScalar>(source: &[T], destination: &mut [T]) {
     for (s, d) in source.iter().zip(destination) {
         *d = s.signum();
     }
 }
 
 /// Calculate the onenorm of a vector (an `ArrayBase` with dimension `Ix1`).
-fn vector_onenorm<S>(a: &ArrayBase<S, Ix1>) -> f64
-    where S: Data<Elem=f64>,
+fn vector_onenorm<S, T: BlasScalar>(a: &ArrayBase<S, Ix1>) -> T::RealField
+    where S: Data<Elem=T>,
 {
     let stride = a.strides()[0];
     assert!(stride >= 0);
@@ -517,15 +509,15 @@ fn vector_onenorm<S>(a: &ArrayBase<S, Ix1>) -> f64
         let total_len = n_elements * stride;
         unsafe { slice::from_raw_parts(a, total_len) }
     };
-
-    unsafe {
-        cblas::dasum(n_elements as i32, a_slice, stride as i32)
-    }
+    T::asum(n_elements as i32, a_slice, stride as i32)
+//    unsafe {
+//        cblas::dasum(n_elements as i32, a_slice, stride as i32)
+//    }
 }
 
 /// Calculate the maximum norm of a vector (an `ArrayBase` with dimension `Ix1`).
-fn vector_maxnorm<S>(a: &ArrayBase<S, Ix1>) -> f64
-    where S: Data<Elem=f64>
+fn vector_maxnorm<S, T: BlasScalar>(a: &ArrayBase<S, Ix1>) -> T::RealField
+    where S: Data<Elem=T>
 {
     let stride = a.strides()[0];
     assert!(stride >= 0);
@@ -536,15 +528,15 @@ fn vector_maxnorm<S>(a: &ArrayBase<S, Ix1>) -> f64
         let total_len = n_elements * stride;
         unsafe { slice::from_raw_parts(a, total_len) }
     };
-
-    let idx = unsafe {
-        cblas::idamax(
-            n_elements as i32,
-            a_slice,
-            stride as i32,
-        ) as usize
-    };
-    f64::abs(a[idx])
+    let idx = T::amax(n_elements as i32, a_slice, stride as i32) as usize;
+//    let idx = unsafe {
+//        cblas::idamax(
+//            n_elements as i32,
+//            a_slice,
+//            stride as i32,
+//        ) as usize
+//    };
+    T::abs(a[idx])
 }
 
 // /// Calculate the onenorm of a matrix (an `ArrayBase` with dimension `Ix2`).
@@ -579,10 +571,11 @@ fn vector_maxnorm<S>(a: &ArrayBase<S, Ix1>) -> f64
 
 /// Returns the one-norm of a matrix `a` together with the index of that column for
 /// which the norm is maximal.
-fn matrix_onenorm_with_index<S>(a: &ArrayBase<S, Ix2>) -> (usize, f64)
-    where S: Data<Elem=f64>,
+fn matrix_onenorm_with_index<S, T: BlasScalar>(a: &ArrayBase<S, Ix2>) -> (usize, T::RealField)
+    where S: Data<Elem=T>,
 {
-    let mut max_norm = 0.0;
+    //todo:
+    let mut max_norm : T::RealField = <T::RealField as Zero>::zero();
     let mut max_norm_index = 0;
     for (i, column) in a.gencolumns().into_iter().enumerate() {
         let norm = vector_onenorm(&column);
@@ -606,13 +599,13 @@ fn matrix_onenorm_with_index<S>(a: &ArrayBase<S, Ix2>) -> (usize, f64)
 ///
 /// Panics if arrays `a` and `c` don't have the same dimensions, or if the length of the slice
 /// `column_is_parallel` is not equal to the number of columns in `a`.
-fn find_parallel_columns_in<S1, S2> (
+fn find_parallel_columns_in<S1, S2, T: BlasScalar> (
     a: &ArrayBase<S1, Ix2>,
     c: &mut ArrayBase<S2, Ix2>,
     column_is_parallel: &mut [bool]
 )
-    where S1: Data<Elem=f64>,
-          S2: DataMut<Elem=f64>
+    where S1: Data<Elem=T>,
+          S2: DataMut<Elem=T>
 {
     let a_dim = a.dim();
     let c_dim = c.dim();
@@ -653,21 +646,32 @@ fn find_parallel_columns_in<S1, S2> (
             cblas::Layout::ColumnMajor => (n_cols, n_rows),
             cblas::Layout::RowMajor => (n_rows, n_cols),
         };
-        unsafe {
-            cblas::dsyrk(
-                layout,
+        T::syrk(layout,
                 cblas::Part::Upper,
-                cblas::Transpose::Ordinary,
+                cblas::Transpose::Conjugate,
                 n_cols as i32,
                 k as i32,
-                1.0,
+                T::from_subset(&1.0),
                 a_slice,
                 lda as i32,
-                0.0,
+                T::zero(),
                 c_slice,
-                n_cols as i32,
-            );
-        }
+                n_cols as i32,);
+//        unsafe {
+//            cblas::dsyrk(
+//                layout,
+//                cblas::Part::Upper,
+//                cblas::Transpose::Ordinary,
+//                n_cols as i32,
+//                k as i32,
+//                1.0,
+//                a_slice,
+//                lda as i32,
+//                0.0,
+//                c_slice,
+//                n_cols as i32,
+//            );
+//       }
     }
 
     // c is upper triangular and contains all pair-wise vector products:
@@ -685,7 +689,7 @@ fn find_parallel_columns_in<S1, S2> (
         if column_is_parallel[i] || i >= n_cols - 1 { continue 'rows; }
         for (j, element) in row.slice(s![i+1..]).iter().enumerate() {
             // Check if the vectors are parallel or anti-parallel
-            if f64::abs(*element) == n_rows as f64 {
+            if T::abs(*element).to_subset().unwrap() == n_rows as f64 {
                 column_is_parallel[i+j+1] = true;
             }
         }
@@ -707,15 +711,15 @@ fn find_parallel_columns_in<S1, S2> (
 ///
 /// Panics if arrays `a`, `b`, and `c` don't have the same dimensions, or if the length of the slice
 /// `column_is_parallel` is not equal to the number of columns in `a`.
-fn find_parallel_columns_between<S1, S2, S3> (
+fn find_parallel_columns_between<S1, S2, S3, T: BlasScalar> (
     a: &ArrayBase<S1, Ix2>,
     b: &ArrayBase<S2, Ix2>,
     c: &mut ArrayBase<S3, Ix2>,
     column_is_parallel: &mut [bool],
 )
-    where S1: Data<Elem=f64>,
-          S2: Data<Elem=f64>,
-          S3: DataMut<Elem=f64>
+    where S1: Data<Elem=T>,
+          S2: Data<Elem=T>,
+          S3: DataMut<Elem=T>
 {
     let a_dim = a.dim();
     let b_dim = b.dim();
@@ -738,24 +742,28 @@ fn find_parallel_columns_between<S1, S2, S3> (
 
         let layout = a_layout;
 
-        unsafe {
-            cblas::dgemm(
-                layout,
-                cblas::Transpose::Ordinary,
-                cblas::Transpose::None,
-                n_cols as i32,
-                n_cols as i32,
-                n_rows as i32,
-                1.0,
-                a_slice,
-                n_cols as i32,
-                b_slice,
-                n_cols as i32,
-                0.0,
-                c_slice,
-                n_cols as i32,
-            );
-        }
+        T::gemm(layout, cblas::Transpose::Conjugate, cblas::Transpose::None,
+                n_cols as i32, n_cols as i32, n_rows as i32,
+                T::one(), a_slice, n_cols as i32, b_slice, n_cols as i32,
+                T::zero(), c_slice, n_cols as i32);
+//        unsafe {
+//            cblas::dgemm(
+//                layout,
+//                cblas::Transpose::Ordinary,
+//                cblas::Transpose::None,
+//                n_cols as i32,
+//                n_cols as i32,
+//                n_rows as i32,
+//                1.0,
+//                a_slice,
+//                n_cols as i32,
+//                b_slice,
+//                n_cols as i32,
+//                0.0,
+//                c_slice,
+//                n_cols as i32,
+//            );
+//        }
     }
 
     // We are iterating over the rows because it's more memory efficient (for row-major array).  In
@@ -767,7 +775,7 @@ fn find_parallel_columns_between<S1, S2, S3> (
         // Skip if the column is already found to be parallel the last column.
         if column_is_parallel[i] { continue 'rows; }
         for element in row {
-            if f64::abs(*element) == n_rows as f64 {
+            if T::abs(*element).to_subset().unwrap() == n_rows as f64 {
                 column_is_parallel[i] = true;
                 continue 'rows;
             }
@@ -780,13 +788,13 @@ fn find_parallel_columns_between<S1, S2, S3> (
 ///
 /// Assumes that we have parallelity only if all entries of two columns `a` and `b` are either +1
 /// or -1.
-fn are_all_columns_parallel_between<S1, S2> (
+fn are_all_columns_parallel_between<S1, S2, T: BlasScalar> (
     a: &ArrayBase<S1, Ix2>,
     b: &ArrayBase<S1, Ix2>,
     c: &mut ArrayBase<S2, Ix2>,
 ) -> bool
-    where S1: Data<Elem=f64>,
-          S2: DataMut<Elem=f64>
+    where S1: Data<Elem=T>,
+          S2: DataMut<Elem=T>
 {
     let a_dim = a.dim();
     let b_dim = b.dim();
@@ -806,25 +814,28 @@ fn are_all_columns_parallel_between<S1, S2> (
         assert_eq!(a_layout, c_layout);
 
         let layout = a_layout;
-
-        unsafe {
-            cblas::dgemm(
-                layout,
-                cblas::Transpose::Ordinary,
-                cblas::Transpose::None,
-                n_cols as i32,
-                n_cols as i32,
-                n_rows as i32,
-                1.0,
-                a_slice,
-                n_cols as i32,
-                b_slice,
-                n_cols as i32,
-                0.0,
-                c_slice,
-                n_rows as i32,
-            );
-        }
+        T::gemm(layout, cblas::Transpose::Conjugate, cblas::Transpose::None,
+                n_cols as i32, n_cols as i32, n_rows as i32,
+                T::one(), a_slice, n_cols as i32, b_slice, n_cols as i32,
+                T::zero(), c_slice, n_rows as i32,);
+//        unsafe {
+//            cblas::dgemm(
+//                layout,
+//                cblas::Transpose::Ordinary,
+//                cblas::Transpose::None,
+//                n_cols as i32,
+//                n_cols as i32,
+//                n_rows as i32,
+//                1.0,
+//                a_slice,
+//                n_cols as i32,
+//                b_slice,
+//                n_cols as i32,
+//                0.0,
+//                c_slice,
+//                n_rows as i32,
+//            );
+//        }
     }
 
     // We are iterating over the rows because it's more memory efficient (for row-major array).  In
@@ -834,7 +845,7 @@ fn are_all_columns_parallel_between<S1, S2> (
     'rows: for row in c.genrows() {
         for element in row {
             // If a parallel column was found, cut to the next one.
-            if f64::abs(*element) == n_rows as f64 { continue 'rows; }
+            if T::abs(*element).to_subset().unwrap() == n_rows as f64 { continue 'rows; }
         }
         // This return statement should only be reached if not a single column parallel to the
         // current one was found.
@@ -845,17 +856,17 @@ fn are_all_columns_parallel_between<S1, S2> (
 
 /// Find parallel columns in matrix `a` and columns in `a` that are parallel to any columns in
 /// matrix `b`, and replace those with random vectors. Returns `true` if resampling has taken place.
-fn resample_parallel_columns<S1, S2, S3, R>(
+fn resample_parallel_columns<S1, S2, S3, R, T: BlasScalar>(
     a: &mut ArrayBase<S1, Ix2>,
     b: &ArrayBase<S2, Ix2>,
     c: &mut ArrayBase<S3, Ix2>,
     column_is_parallel: &mut [bool],
     rng: &mut R,
-    sample: &[f64],
+    sample: &[T],
 ) -> bool
-    where S1: DataMut<Elem=f64>,
-          S2: Data<Elem=f64>,
-          S3: DataMut<Elem=f64>,
+    where S1: DataMut<Elem=T>,
+          S2: Data<Elem=T>,
+          S3: DataMut<Elem=T>,
           R: Rng
 {
     column_is_parallel.iter_mut().for_each(|x| {*x = false;});
@@ -874,8 +885,12 @@ fn resample_parallel_columns<S1, S2, S3, R>(
 /// Resamples column `i` of matrix `a` with elements drawn from `sample` using `rng`.
 ///
 /// Panics if `i` exceeds the number of columns in `a`.
-fn resample_column<R, S>(a: &mut ArrayBase<S, Ix2>, i: usize, rng: &mut R, sample: &[f64])
-    where S: DataMut<Elem=f64>,
+fn resample_column<R, S, T: BlasScalar>(
+    a: &mut ArrayBase<S, Ix2>,
+    i: usize, rng:
+    &mut R, sample: &[T]
+)
+    where S: DataMut<Elem=T>,
           R: Rng
 {
     assert!(i < a.dim().1, "Trying to resample column with index exceeding matrix dimensions");

From 8d64d23bacf7669890af719dc9a23785242ddd62 Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Mon, 4 Nov 2019 14:13:18 -0800
Subject: [PATCH 02/12] Clarified transpose for syrk

---
 .gitignore |  1 +
 src/lib.rs | 18 ++----------------
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6936990..e988cd0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 /target
 **/*.rs.bk
 Cargo.lock
+/.cargo/
diff --git a/src/lib.rs b/src/lib.rs
index 2674538..ec3cf8e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -648,7 +648,8 @@ fn find_parallel_columns_in<S1, S2, T: BlasScalar> (
         };
         T::syrk(layout,
                 cblas::Part::Upper,
-                cblas::Transpose::Conjugate,
+            //All entries are assumed to be real, so herk w/ conj trans is not necessary
+                cblas::Transpose::Ordinary,
                 n_cols as i32,
                 k as i32,
                 T::from_subset(&1.0),
@@ -657,21 +658,6 @@ fn find_parallel_columns_in<S1, S2, T: BlasScalar> (
                 T::zero(),
                 c_slice,
                 n_cols as i32,);
-//        unsafe {
-//            cblas::dsyrk(
-//                layout,
-//                cblas::Part::Upper,
-//                cblas::Transpose::Ordinary,
-//                n_cols as i32,
-//                k as i32,
-//                1.0,
-//                a_slice,
-//                lda as i32,
-//                0.0,
-//                c_slice,
-//                n_cols as i32,
-//            );
-//       }
     }
 
     // c is upper triangular and contains all pair-wise vector products:

From 2c64dc7f5af491015e1da1b7c1df945b94960c35 Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Tue, 5 Nov 2019 03:23:48 -0800
Subject: [PATCH 03/12] kept parallel check for complex scalars

---
 src/lib.rs | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index ec3cf8e..d4f6abe 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,7 +13,7 @@ use ndarray::{
     Ix2,
     s,
 };
-use num_traits::{Float, Zero};
+use num_traits::{Float, Zero, One};
 use ordered_float::NotNan;
 use rand::{
     Rng,
@@ -282,17 +282,18 @@ impl<T: BlasScalar> Normest1<T> {
                 // > or to a column of Sold by replacing columns of S by rand{-1,+1}
                 //
                 // NOTE: We are reusing `y_matrix` here as a temporary value.
-                // Note: Parallel column test can be skipped in complex case
-                if TypeId::of::<T>() == TypeId::of::<f32>() || TypeId::of::<T>() == TypeId::of::<f64>(){
-                    resample_parallel_columns(
-                        &mut self.sign_matrix,
-                        &self.sign_matrix_old,
-                        &mut self.y_matrix,
-                        &mut self.column_is_parallel,
-                        &mut self.rng,
-                        &sample,
-                    );
-                }
+                // Note: could the parallel column test be skipped in complex case?
+                //    may cause a few ulps difference
+                //if TypeId::of::<T>() == TypeId::of::<f32>() || TypeId::of::<T>() == TypeId::of::<f64>(){
+                resample_parallel_columns(
+                    &mut self.sign_matrix,
+                    &self.sign_matrix_old,
+                    &mut self.y_matrix,
+                    &mut self.column_is_parallel,
+                    &mut self.rng,
+                    &sample,
+                );
+                //}
             }
 
             // > est_old = est, Sold = S
@@ -646,16 +647,15 @@ fn find_parallel_columns_in<S1, S2, T: BlasScalar> (
             cblas::Layout::ColumnMajor => (n_cols, n_rows),
             cblas::Layout::RowMajor => (n_rows, n_cols),
         };
-        T::syrk(layout,
+        T::herk(layout,
                 cblas::Part::Upper,
-            //All entries are assumed to be real, so herk w/ conj trans is not necessary
-                cblas::Transpose::Ordinary,
+                cblas::Transpose::Conjugate,
                 n_cols as i32,
                 k as i32,
-                T::from_subset(&1.0),
+                T::RealField::one(),
                 a_slice,
                 lda as i32,
-                T::zero(),
+                T::RealField::zero(),
                 c_slice,
                 n_cols as i32,);
     }

From 268bd483bb0dbec3e2ceb2cbb378b81f6b3bac39 Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Sat, 9 Nov 2019 19:20:37 -0800
Subject: [PATCH 04/12] Update ndarray to 0.13

---
 Cargo.toml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index d587d5a..46292bf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,7 @@ keywords = ["condition", "linalg", "matrix", "ndarray", "norm"]
 alga = "0.9.2"
 cblas = "0.2"
 lapacke = "0.2"
-ndarray = "0.12"
+ndarray = "0.13"
 num-traits = "0.2.8"
 ordered-float = "1.0"
 rand = "0.6"
@@ -25,6 +25,8 @@ blas-traits = {git="https://github.com/hmunozb/blas-traits"}
 
 [dev-dependencies]
 ndarray-rand = "0.9"
-openblas-src = "0.7"
+#openblas-src = "0.7"
+openblas-src = {version="0.7.0", default-features=false, features=["system"]}
+#cblas-sys = "0.1.4"
 lapacke-sys = "0.1.4"
 

From 61ca7d73836f437fafcf263fb620acacf6c114fb Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Sun, 29 Mar 2020 02:26:28 -0700
Subject: [PATCH 05/12] Updated crate versions

---
 Cargo.toml | 19 ++++++++-----------
 src/lib.rs | 54 ++++++------------------------------------------------
 2 files changed, 14 insertions(+), 59 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 46292bf..deb242e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "condest"
-version = "0.2.1"
+version = "0.3.0"
 authors = ["Richard Janis Goldschmidt <janis.beckert@gmail.com>"]
 edition = "2018"
 license = "MIT OR Apache-2.0"
@@ -12,21 +12,18 @@ categories = ["science"]
 keywords = ["condition", "linalg", "matrix", "ndarray", "norm"]
 
 [dependencies]
-alga = "0.9.2"
+alga = "0.9"
 cblas = "0.2"
 lapacke = "0.2"
+lapack-traits = "0.1.0"
 ndarray = "0.13"
 num-traits = "0.2.8"
 ordered-float = "1.0"
-rand = "0.6"
-rand_xoshiro = "0.1"
-
-blas-traits = {git="https://github.com/hmunozb/blas-traits"}
+rand = "0.7"
+rand_xoshiro = "0.4"
 
 [dev-dependencies]
-ndarray-rand = "0.9"
-#openblas-src = "0.7"
-openblas-src = {version="0.7.0", default-features=false, features=["system"]}
-#cblas-sys = "0.1.4"
-lapacke-sys = "0.1.4"
+ndarray-rand = "0.11"
+#openblas-src = "0.9"
+openblas-src = {version="0.9", default-features=false, features=["system"]}
 
diff --git a/src/lib.rs b/src/lib.rs
index d4f6abe..3aa6c44 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,8 +1,9 @@
 //! This crate implements the matrix 1-norm estimator by [Higham and Tisseur].
 //!
 //! [Higham and Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
-use alga::general::{ComplexField, SupersetOf};
-use blas_traits::BlasScalar;
+use alga::general::SupersetOf;
+use lapack_traits::BlasScalar;
+
 use ndarray::{
     prelude::*,
     ArrayBase,
@@ -13,7 +14,7 @@ use ndarray::{
     Ix2,
     s,
 };
-use num_traits::{Float, Zero, One};
+use num_traits::{Zero, One};
 use ordered_float::NotNan;
 use rand::{
     Rng,
@@ -24,7 +25,6 @@ use rand_xoshiro::Xoshiro256StarStar;
 use std::collections::BTreeSet;
 use std::cmp;
 use std::slice;
-use std::any::TypeId;
 
 pub struct Normest1<T: BlasScalar> where {
     n: usize,
@@ -530,13 +530,7 @@ fn vector_maxnorm<S, T: BlasScalar>(a: &ArrayBase<S, Ix1>) -> T::RealField
         unsafe { slice::from_raw_parts(a, total_len) }
     };
     let idx = T::amax(n_elements as i32, a_slice, stride as i32) as usize;
-//    let idx = unsafe {
-//        cblas::idamax(
-//            n_elements as i32,
-//            a_slice,
-//            stride as i32,
-//        ) as usize
-//    };
+
     T::abs(a[idx])
 }
 
@@ -732,24 +726,6 @@ fn find_parallel_columns_between<S1, S2, S3, T: BlasScalar> (
                 n_cols as i32, n_cols as i32, n_rows as i32,
                 T::one(), a_slice, n_cols as i32, b_slice, n_cols as i32,
                 T::zero(), c_slice, n_cols as i32);
-//        unsafe {
-//            cblas::dgemm(
-//                layout,
-//                cblas::Transpose::Ordinary,
-//                cblas::Transpose::None,
-//                n_cols as i32,
-//                n_cols as i32,
-//                n_rows as i32,
-//                1.0,
-//                a_slice,
-//                n_cols as i32,
-//                b_slice,
-//                n_cols as i32,
-//                0.0,
-//                c_slice,
-//                n_cols as i32,
-//            );
-//        }
     }
 
     // We are iterating over the rows because it's more memory efficient (for row-major array).  In
@@ -804,24 +780,6 @@ fn are_all_columns_parallel_between<S1, S2, T: BlasScalar> (
                 n_cols as i32, n_cols as i32, n_rows as i32,
                 T::one(), a_slice, n_cols as i32, b_slice, n_cols as i32,
                 T::zero(), c_slice, n_rows as i32,);
-//        unsafe {
-//            cblas::dgemm(
-//                layout,
-//                cblas::Transpose::Ordinary,
-//                cblas::Transpose::None,
-//                n_cols as i32,
-//                n_cols as i32,
-//                n_rows as i32,
-//                1.0,
-//                a_slice,
-//                n_cols as i32,
-//                b_slice,
-//                n_cols as i32,
-//                0.0,
-//                c_slice,
-//                n_rows as i32,
-//            );
-//        }
     }
 
     // We are iterating over the rows because it's more memory efficient (for row-major array).  In
@@ -1037,7 +995,7 @@ mod tests {
                 *u = *c / *e;
         });
 
-        let underestimation_mean = underestimation_ratio.mean_axis(Axis(0)).into_scalar();
+        let underestimation_mean = underestimation_ratio.mean_axis(Axis(0)).unwrap().into_scalar();
         assert!(0.99 < underestimation_mean);
         assert!(underestimation_mean < 1.0);
     }

From 69ea9be7825eb8dce74c65f9553b3df9048ea0b5 Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Sun, 29 Mar 2020 02:32:53 -0700
Subject: [PATCH 06/12] Fixed test deprecation warnings

---
 Cargo.toml |  1 +
 src/lib.rs | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index deb242e..e874aef 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,6 +20,7 @@ ndarray = "0.13"
 num-traits = "0.2.8"
 ordered-float = "1.0"
 rand = "0.7"
+rand_distr = "0.2"
 rand_xoshiro = "0.4"
 
 [dev-dependencies]
diff --git a/src/lib.rs b/src/lib.rs
index 3aa6c44..a04c07c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -893,7 +893,7 @@ mod tests {
     use rand::{
         SeedableRng,
     };
-    use rand::distributions::StandardNormal;
+    use rand_distr::StandardNormal;
     use rand_xoshiro::Xoshiro256Plus;
 
     #[test]
@@ -905,7 +905,7 @@ mod tests {
         let mut rng = Xoshiro256Plus::seed_from_u64(1234);
         let distribution = StandardNormal;
 
-        let mut a_matrix = Array::random_using((n, n), distribution, &mut rng);
+        let mut a_matrix = Array2::<f64>::random_using((n, n), distribution, &mut rng);
         a_matrix.mapv_inplace(|x| 1.0/x);
 
         let unity = Array::eye(n);
@@ -927,7 +927,7 @@ mod tests {
         let mut rng = Xoshiro256Plus::seed_from_u64(1234);
         let distribution = StandardNormal;
 
-        let mut a_matrix = Array::random_using((n, n), distribution, &mut rng);
+        let mut a_matrix = Array2::<f64>::random_using((n, n), distribution, &mut rng);
         a_matrix.mapv_inplace(|x| 1.0/x);
 
         let estimate_matrixpow = crate::normest1_pow(&a_matrix, 2, t, itmax);
@@ -962,7 +962,7 @@ mod tests {
         let distribution = StandardNormal;
 
         for _ in 0..nsamples {
-            let mut a_matrix = Array::random_using((n, n), distribution, &mut rng);
+            let mut a_matrix = Array2::<f64>::random_using((n, n), distribution, &mut rng);
             a_matrix.mapv_inplace(|x| 1.0/x);
             let estimate = crate::normest1(&a_matrix, t, itmax);
             calculated.push(estimate);
@@ -984,8 +984,8 @@ mod tests {
             });
         }
 
-        let calculated = Array1::from_vec(calculated);
-        let expected = Array1::from_vec(expected);
+        let calculated = Array1::from(calculated);
+        let expected = Array1::from(expected);
 
         let mut underestimation_ratio = unsafe { Array1::<f64>::uninitialized(nsamples) };
         Zip::from(&calculated)

From d9ebe691484c197a77bcec30da13c4e278b72ff1 Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Tue, 14 Apr 2020 01:43:02 -0700
Subject: [PATCH 07/12] Lapack-traits methods wrapped in unsafe.

---
 Cargo.toml |  2 +-
 src/lib.rs | 58 +++++++++++++++++++++++++++++++-----------------------
 2 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index e874aef..008139c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,7 @@ keywords = ["condition", "linalg", "matrix", "ndarray", "norm"]
 alga = "0.9"
 cblas = "0.2"
 lapacke = "0.2"
-lapack-traits = "0.1.0"
+lapack-traits = "0.2.0"
 ndarray = "0.13"
 num-traits = "0.2.8"
 ordered-float = "1.0"
diff --git a/src/lib.rs b/src/lib.rs
index a04c07c..0e04c7d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -98,10 +98,12 @@ impl<S1, T: BlasScalar> LinearOperator<T> for ArrayBase<S1, Ix2>
         } else {
             cblas::Transpose::None
         };
-        T::gemm(layout, a_transpose, cblas::Transpose::None,
-                 n as i32, t as i32, n as i32,
-                 T::from_subset(&1.0), a_slice, n as i32, b_slice, t as i32,
-                 T::from_subset(&0.0), c_slice, t as i32,);
+        unsafe{
+            T::gemm(layout, a_transpose, cblas::Transpose::None,
+                     n as i32, t as i32, n as i32,
+                     T::from_subset(&1.0), a_slice, n as i32, b_slice, t as i32,
+                     T::from_subset(&0.0), c_slice, t as i32,);
+        }
     }
 }
 
@@ -510,10 +512,10 @@ fn vector_onenorm<S, T: BlasScalar>(a: &ArrayBase<S, Ix1>) -> T::RealField
         let total_len = n_elements * stride;
         unsafe { slice::from_raw_parts(a, total_len) }
     };
-    T::asum(n_elements as i32, a_slice, stride as i32)
-//    unsafe {
-//        cblas::dasum(n_elements as i32, a_slice, stride as i32)
-//    }
+
+    unsafe{
+        T::asum(n_elements as i32, a_slice, stride as i32)
+    }
 }
 
 /// Calculate the maximum norm of a vector (an `ArrayBase` with dimension `Ix1`).
@@ -529,7 +531,7 @@ fn vector_maxnorm<S, T: BlasScalar>(a: &ArrayBase<S, Ix1>) -> T::RealField
         let total_len = n_elements * stride;
         unsafe { slice::from_raw_parts(a, total_len) }
     };
-    let idx = T::amax(n_elements as i32, a_slice, stride as i32) as usize;
+    let idx = unsafe{ T::amax(n_elements as i32, a_slice, stride as i32) } as usize;
 
     T::abs(a[idx])
 }
@@ -641,17 +643,19 @@ fn find_parallel_columns_in<S1, S2, T: BlasScalar> (
             cblas::Layout::ColumnMajor => (n_cols, n_rows),
             cblas::Layout::RowMajor => (n_rows, n_cols),
         };
-        T::herk(layout,
-                cblas::Part::Upper,
-                cblas::Transpose::Conjugate,
-                n_cols as i32,
-                k as i32,
-                T::RealField::one(),
-                a_slice,
-                lda as i32,
-                T::RealField::zero(),
-                c_slice,
-                n_cols as i32,);
+        unsafe{
+            T::herk(layout,
+                    cblas::Part::Upper,
+                    cblas::Transpose::Conjugate,
+                    n_cols as i32,
+                    k as i32,
+                    T::RealField::one(),
+                    a_slice,
+                    lda as i32,
+                    T::RealField::zero(),
+                    c_slice,
+                    n_cols as i32,);
+        }
     }
 
     // c is upper triangular and contains all pair-wise vector products:
@@ -722,10 +726,12 @@ fn find_parallel_columns_between<S1, S2, S3, T: BlasScalar> (
 
         let layout = a_layout;
 
-        T::gemm(layout, cblas::Transpose::Conjugate, cblas::Transpose::None,
+        unsafe{
+            T::gemm(layout, cblas::Transpose::Conjugate, cblas::Transpose::None,
                 n_cols as i32, n_cols as i32, n_rows as i32,
                 T::one(), a_slice, n_cols as i32, b_slice, n_cols as i32,
                 T::zero(), c_slice, n_cols as i32);
+        }
     }
 
     // We are iterating over the rows because it's more memory efficient (for row-major array).  In
@@ -776,10 +782,12 @@ fn are_all_columns_parallel_between<S1, S2, T: BlasScalar> (
         assert_eq!(a_layout, c_layout);
 
         let layout = a_layout;
-        T::gemm(layout, cblas::Transpose::Conjugate, cblas::Transpose::None,
-                n_cols as i32, n_cols as i32, n_rows as i32,
-                T::one(), a_slice, n_cols as i32, b_slice, n_cols as i32,
-                T::zero(), c_slice, n_rows as i32,);
+        unsafe{
+            T::gemm(layout, cblas::Transpose::Conjugate, cblas::Transpose::None,
+                    n_cols as i32, n_cols as i32, n_rows as i32,
+                    T::one(), a_slice, n_cols as i32, b_slice, n_cols as i32,
+                    T::zero(), c_slice, n_rows as i32,);
+        }
     }
 
     // We are iterating over the rows because it's more memory efficient (for row-major array).  In

From a4a5a3cf7c9991c6c44fd1b559ad14fcec8e0e3e Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Mon, 27 Apr 2020 23:43:43 -0700
Subject: [PATCH 08/12] Use SupersetOf from lapack-traits

---
 Cargo.toml | 5 ++---
 src/lib.rs | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 008139c..1dbd543 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,12 +12,11 @@ categories = ["science"]
 keywords = ["condition", "linalg", "matrix", "ndarray", "norm"]
 
 [dependencies]
-alga = "0.9"
 cblas = "0.2"
 lapacke = "0.2"
-lapack-traits = "0.2.0"
+lapack-traits = "0.2.2"
 ndarray = "0.13"
-num-traits = "0.2.8"
+num-traits = "0.2"
 ordered-float = "1.0"
 rand = "0.7"
 rand_distr = "0.2"
diff --git a/src/lib.rs b/src/lib.rs
index 0e04c7d..610a720 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,8 +1,7 @@
 //! This crate implements the matrix 1-norm estimator by [Higham and Tisseur].
 //!
 //! [Higham and Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
-use alga::general::SupersetOf;
-use lapack_traits::BlasScalar;
+use lapack_traits::{BlasScalar, SupersetOf};
 
 use ndarray::{
     prelude::*,

From 9d1dd7fdfa57e8c3254ca8a83d37a8c06aeae0a1 Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Tue, 22 Jun 2021 21:58:50 -0700
Subject: [PATCH 09/12] Update lapack_traits and other dependencies

---
 Cargo.toml | 22 +++++++++++++---------
 src/lib.rs | 45 +++++++++++++++++++++++----------------------
 2 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 1dbd543..63a7983 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,18 +12,22 @@ categories = ["science"]
 keywords = ["condition", "linalg", "matrix", "ndarray", "norm"]
 
 [dependencies]
-cblas = "0.2"
-lapacke = "0.2"
-lapack-traits = "0.2.2"
-ndarray = "0.13"
+cblas = "0.4"
+lapacke = "0.5"
+lapack-traits = {version="0.3", features=["simba"]}
+ndarray = "0.15"
 num-traits = "0.2"
 ordered-float = "1.0"
-rand = "0.7"
-rand_distr = "0.2"
-rand_xoshiro = "0.4"
+rand = "0.8"
+rand_distr = "0.4"
+rand_xoshiro = "0.6"
+simba="0.5"
+
+#[patch.crates-io]
+#lapack-traits = {git="https://github.com/hmunozb/lapack-traits.git"}
 
 [dev-dependencies]
-ndarray-rand = "0.11"
+ndarray-rand = "0.14"
 #openblas-src = "0.9"
-openblas-src = {version="0.9", default-features=false, features=["system"]}
+openblas-src = {version="0.10", default-features=false, features=["system"]}
 
diff --git a/src/lib.rs b/src/lib.rs
index 610a720..df169ed 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,7 +1,8 @@
 //! This crate implements the matrix 1-norm estimator by [Higham and Tisseur].
 //!
 //! [Higham and Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
-use lapack_traits::{BlasScalar, SupersetOf};
+use lapack_traits::LComplexField;
+use simba::scalar::SupersetOf;
 
 use ndarray::{
     prelude::*,
@@ -25,7 +26,7 @@ use std::collections::BTreeSet;
 use std::cmp;
 use std::slice;
 
-pub struct Normest1<T: BlasScalar> where {
+pub struct Normest1<T: LComplexField> where {
     n: usize,
     t: usize,
     rng: Xoshiro256StarStar,
@@ -58,12 +59,12 @@ pub struct Normest1<T: BlasScalar> where {
 ///
 /// It is at the designation of the user to check what is more efficient: to pass in one definite
 /// matrix or choose the alternative route described here.
-trait LinearOperator<T: BlasScalar> {
+trait LinearOperator<T: LComplexField> {
     fn multiply_matrix<S>(&self, b: &mut ArrayBase<S, Ix2>, c: &mut ArrayBase<S, Ix2>, transpose: bool)
         where S: DataMut<Elem=T>;
 }
 
-impl<S1, T: BlasScalar> LinearOperator<T> for ArrayBase<S1, Ix2>
+impl<S1, T: LComplexField> LinearOperator<T> for ArrayBase<S1, Ix2>
     where S1: Data<Elem=T>,
 {
     fn multiply_matrix<S2>(&self, b: &mut ArrayBase<S2, Ix2>, c: &mut ArrayBase<S2, Ix2>, transpose: bool)
@@ -106,7 +107,7 @@ impl<S1, T: BlasScalar> LinearOperator<T> for ArrayBase<S1, Ix2>
     }
 }
 
-impl<S1, T: BlasScalar> LinearOperator<T> for [&ArrayBase<S1, Ix2>]
+impl<S1, T: LComplexField> LinearOperator<T> for [&ArrayBase<S1, Ix2>]
     where S1: Data<Elem=T>
 {
     fn multiply_matrix<S2>(&self, b: &mut ArrayBase<S2, Ix2>, c: &mut ArrayBase<S2, Ix2>, transpose: bool)
@@ -140,7 +141,7 @@ impl<S1, T: BlasScalar> LinearOperator<T> for [&ArrayBase<S1, Ix2>]
     }
 }
 
-impl<S1, T: BlasScalar> LinearOperator<T> for (&ArrayBase<S1, Ix2>, usize)
+impl<S1, T: LComplexField> LinearOperator<T> for (&ArrayBase<S1, Ix2>, usize)
     where S1: Data<Elem=T>
 {
     fn multiply_matrix<S2>(&self, b: &mut ArrayBase<S2, Ix2>, c: &mut ArrayBase< S2, Ix2>, transpose: bool)
@@ -158,7 +159,7 @@ impl<S1, T: BlasScalar> LinearOperator<T> for (&ArrayBase<S1, Ix2>, usize)
     }
 }
 
-impl<T: BlasScalar> Normest1<T> {
+impl<T: LComplexField> Normest1<T> {
     pub fn new(n: usize, t: usize) -> Self {
         assert!(t <= n, "Cannot have more iteration columns t than columns in the matrix.");
         let rng = Xoshiro256StarStar::from_rng(&mut thread_rng()).expect("Rng initialization failed.");
@@ -221,7 +222,7 @@ impl<T: BlasScalar> Normest1<T> {
         // lessens the importance of counterexamples (see the comments in the next section).”
         {
             let rng_mut = &mut self.rng;
-            self.x_matrix.mapv_inplace(|_| sample[rng_mut.gen_range(0, sample.len())] );
+            self.x_matrix.mapv_inplace(|_| sample[rng_mut.gen_range(0..sample.len())] );
             self.x_matrix.column_mut(0).fill(T::one());
         }
 
@@ -430,7 +431,7 @@ impl<T: BlasScalar> Normest1<T> {
 ///
 /// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
 /// [`Normest1`]: struct.Normest1.html
-pub fn normest1<T: BlasScalar>(a_matrix: &Array2<T>, t: usize, itmax: usize) -> T::RealField
+pub fn normest1<T: LComplexField>(a_matrix: &Array2<T>, t: usize, itmax: usize) -> T::RealField
 {
     // Assume the matrix is square and take the columns as n. If it's not square, the assertion in
     // normest.calculate will fail.
@@ -448,7 +449,7 @@ pub fn normest1<T: BlasScalar>(a_matrix: &Array2<T>, t: usize, itmax: usize) ->
 /// 1-norm on matrices of the same size, construct a [`Normest1`] first, and call its methods.
 ///
 /// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
-pub fn normest1_pow<T: BlasScalar>(a_matrix: &Array2<T>, m: usize, t: usize, itmax: usize) -> T::RealField
+pub fn normest1_pow<T: LComplexField>(a_matrix: &Array2<T>, m: usize, t: usize, itmax: usize) -> T::RealField
 {
     // Assume the matrix is square and take the columns as n. If it's not square, the assertion in
     // normest.calculate will fail.
@@ -467,7 +468,7 @@ pub fn normest1_pow<T: BlasScalar>(a_matrix: &Array2<T>, m: usize, t: usize, itm
 /// 1-norm on matrices of the same size, construct a [`Normest1`] first, and call its methods.
 ///
 /// [Higham, Tisseur]: http://eprints.ma.man.ac.uk/321/1/covered/MIMS_ep2006_145.pdf
-pub fn normest1_prod<T: BlasScalar>(a_matrices: &[&Array2<T>], t: usize, itmax: usize) -> T::RealField
+pub fn normest1_prod<T: LComplexField>(a_matrices: &[&Array2<T>], t: usize, itmax: usize) -> T::RealField
 {
     assert!(a_matrices.len() > 0);
     let n = a_matrices[0].dim().1;
@@ -479,7 +480,7 @@ pub fn normest1_prod<T: BlasScalar>(a_matrices: &[&Array2<T>], t: usize, itmax:
 ///
 /// Panics if matrices `a` and `b` have different shape and strides, or if either underlying array is
 /// non-contiguous. This is to make sure that the iteration order over the matrices is the same.
-fn assign_signum_of_array<S1, S2, D, T: BlasScalar>(a: &ArrayBase<S1, D>, b: &mut ArrayBase<S2, D>)
+fn assign_signum_of_array<S1, S2, D, T: LComplexField>(a: &ArrayBase<S1, D>, b: &mut ArrayBase<S2, D>)
     where S1: Data<Elem=T>,
           S2: DataMut<Elem=T>,
           D: Dimension
@@ -492,14 +493,14 @@ fn assign_signum_of_array<S1, S2, D, T: BlasScalar>(a: &ArrayBase<S1, D>, b: &mu
     signum_of_slice(a_slice, b_slice);
 }
 
-fn signum_of_slice<T: BlasScalar>(source: &[T], destination: &mut [T]) {
+fn signum_of_slice<T: LComplexField>(source: &[T], destination: &mut [T]) {
     for (s, d) in source.iter().zip(destination) {
         *d = s.signum();
     }
 }
 
 /// Calculate the onenorm of a vector (an `ArrayBase` with dimension `Ix1`).
-fn vector_onenorm<S, T: BlasScalar>(a: &ArrayBase<S, Ix1>) -> T::RealField
+fn vector_onenorm<S, T: LComplexField>(a: &ArrayBase<S, Ix1>) -> T::RealField
     where S: Data<Elem=T>,
 {
     let stride = a.strides()[0];
@@ -518,7 +519,7 @@ fn vector_onenorm<S, T: BlasScalar>(a: &ArrayBase<S, Ix1>) -> T::RealField
 }
 
 /// Calculate the maximum norm of a vector (an `ArrayBase` with dimension `Ix1`).
-fn vector_maxnorm<S, T: BlasScalar>(a: &ArrayBase<S, Ix1>) -> T::RealField
+fn vector_maxnorm<S, T: LComplexField>(a: &ArrayBase<S, Ix1>) -> T::RealField
     where S: Data<Elem=T>
 {
     let stride = a.strides()[0];
@@ -567,7 +568,7 @@ fn vector_maxnorm<S, T: BlasScalar>(a: &ArrayBase<S, Ix1>) -> T::RealField
 
 /// Returns the one-norm of a matrix `a` together with the index of that column for
 /// which the norm is maximal.
-fn matrix_onenorm_with_index<S, T: BlasScalar>(a: &ArrayBase<S, Ix2>) -> (usize, T::RealField)
+fn matrix_onenorm_with_index<S, T: LComplexField>(a: &ArrayBase<S, Ix2>) -> (usize, T::RealField)
     where S: Data<Elem=T>,
 {
     //todo:
@@ -595,7 +596,7 @@ fn matrix_onenorm_with_index<S, T: BlasScalar>(a: &ArrayBase<S, Ix2>) -> (usize,
 ///
 /// Panics if arrays `a` and `c` don't have the same dimensions, or if the length of the slice
 /// `column_is_parallel` is not equal to the number of columns in `a`.
-fn find_parallel_columns_in<S1, S2, T: BlasScalar> (
+fn find_parallel_columns_in<S1, S2, T: LComplexField> (
     a: &ArrayBase<S1, Ix2>,
     c: &mut ArrayBase<S2, Ix2>,
     column_is_parallel: &mut [bool]
@@ -694,7 +695,7 @@ fn find_parallel_columns_in<S1, S2, T: BlasScalar> (
 ///
 /// Panics if arrays `a`, `b`, and `c` don't have the same dimensions, or if the length of the slice
 /// `column_is_parallel` is not equal to the number of columns in `a`.
-fn find_parallel_columns_between<S1, S2, S3, T: BlasScalar> (
+fn find_parallel_columns_between<S1, S2, S3, T: LComplexField> (
     a: &ArrayBase<S1, Ix2>,
     b: &ArrayBase<S2, Ix2>,
     c: &mut ArrayBase<S3, Ix2>,
@@ -755,7 +756,7 @@ fn find_parallel_columns_between<S1, S2, S3, T: BlasScalar> (
 ///
 /// Assumes that we have parallelity only if all entries of two columns `a` and `b` are either +1
 /// or -1.
-fn are_all_columns_parallel_between<S1, S2, T: BlasScalar> (
+fn are_all_columns_parallel_between<S1, S2, T: LComplexField> (
     a: &ArrayBase<S1, Ix2>,
     b: &ArrayBase<S1, Ix2>,
     c: &mut ArrayBase<S2, Ix2>,
@@ -807,7 +808,7 @@ fn are_all_columns_parallel_between<S1, S2, T: BlasScalar> (
 
 /// Find parallel columns in matrix `a` and columns in `a` that are parallel to any columns in
 /// matrix `b`, and replace those with random vectors. Returns `true` if resampling has taken place.
-fn resample_parallel_columns<S1, S2, S3, R, T: BlasScalar>(
+fn resample_parallel_columns<S1, S2, S3, R, T: LComplexField>(
     a: &mut ArrayBase<S1, Ix2>,
     b: &ArrayBase<S2, Ix2>,
     c: &mut ArrayBase<S3, Ix2>,
@@ -836,7 +837,7 @@ fn resample_parallel_columns<S1, S2, S3, R, T: BlasScalar>(
 /// Resamples column `i` of matrix `a` with elements drawn from `sample` using `rng`.
 ///
 /// Panics if `i` exceeds the number of columns in `a`.
-fn resample_column<R, S, T: BlasScalar>(
+fn resample_column<R, S, T: LComplexField>(
     a: &mut ArrayBase<S, Ix2>,
     i: usize, rng:
     &mut R, sample: &[T]
@@ -846,7 +847,7 @@ fn resample_column<R, S, T: BlasScalar>(
 {
     assert!(i < a.dim().1, "Trying to resample column with index exceeding matrix dimensions");
     assert!(sample.len() > 0);
-    a.column_mut(i).mapv_inplace(|_| sample[rng.gen_range(0, sample.len())]);
+    a.column_mut(i).mapv_inplace(|_| sample[rng.gen_range(0..sample.len())]);
 }
 
 /// Returns slice and layout underlying an array `a`.

From 5a8cc1f32ec20aee1006b547bc108b7244eece35 Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Sun, 27 Jun 2021 16:20:57 -0700
Subject: [PATCH 10/12] remove extraneous where

---
 src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index df169ed..ee1ca5a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -26,7 +26,7 @@ use std::collections::BTreeSet;
 use std::cmp;
 use std::slice;
 
-pub struct Normest1<T: LComplexField> where {
+pub struct Normest1<T: LComplexField> {
     n: usize,
     t: usize,
     rng: Xoshiro256StarStar,

From 65b9215f95ba5a0e881e9f36e25cbcc9589ecf5e Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Sun, 27 Jun 2021 16:32:31 -0700
Subject: [PATCH 11/12] resolve deprecation warnings

---
 src/lib.rs | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index ee1ca5a..73a7f7b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -163,14 +163,15 @@ impl<T: LComplexField> Normest1<T> {
     pub fn new(n: usize, t: usize) -> Self {
         assert!(t <= n, "Cannot have more iteration columns t than columns in the matrix.");
         let rng = Xoshiro256StarStar::from_rng(&mut thread_rng()).expect("Rng initialization failed.");
-        let x_matrix = unsafe { Array2::uninitialized((n, t)) };
-        let y_matrix = unsafe { Array2::uninitialized((n, t)) };
-        let z_matrix = unsafe { Array2::uninitialized((n, t)) };
+        // safety: These arrays are always assigned to intermediate results
+        let x_matrix = unsafe { Array2::uninit((n, t)).assume_init() };
+        let y_matrix = unsafe { Array2::uninit((n, t)).assume_init() };
+        let z_matrix = unsafe { Array2::uninit((n, t)).assume_init() };
 
-        let w_vector = unsafe { Array1::uninitialized(n) };
+        let w_vector = unsafe { Array1::uninit(n).assume_init() };
 
-        let sign_matrix = unsafe { Array2::uninitialized((n, t)) };
-        let sign_matrix_old = unsafe { Array2::uninitialized((n, t)) };
+        let sign_matrix = unsafe { Array2::uninit((n, t)).assume_init() };
+        let sign_matrix_old = unsafe { Array2::uninit((n, t)).assume_init() };
 
         let column_is_parallel = vec![false; t];
 
@@ -314,7 +315,7 @@ impl<T: LComplexField> Normest1<T> {
 
             // hᵢ= ‖Z(i,:)‖_∞
             let mut max_h = T::RealField::zero();
-            for (row, h_element) in self.z_matrix.genrows().into_iter().zip(self.h.iter_mut()) {
+            for (row, h_element) in self.z_matrix.rows().into_iter().zip(self.h.iter_mut()) {
                 let h : T::RealField = vector_maxnorm(&row);
                 max_h = if h > max_h { h } else { max_h };
                 // Convert f64 to NotNan for using sort_unstable_by below
@@ -574,7 +575,7 @@ fn matrix_onenorm_with_index<S, T: LComplexField>(a: &ArrayBase<S, Ix2>) -> (usi
     //todo:
     let mut max_norm : T::RealField = <T::RealField as Zero>::zero();
     let mut max_norm_index = 0;
-    for (i, column) in a.gencolumns().into_iter().enumerate() {
+    for (i, column) in a.columns().into_iter().enumerate() {
         let norm = vector_onenorm(&column);
         if norm > max_norm {
             max_norm = norm;
@@ -667,7 +668,7 @@ fn find_parallel_columns_in<S1, S2, T: LComplexField> (
     // . . . . x
 
     // Don't check more rows than we have columns 
-    'rows: for (i, row) in c.genrows().into_iter().enumerate().take(n_cols) {
+    'rows: for (i, row) in c.rows().into_iter().enumerate().take(n_cols) {
         // Skip if the column is already found to be parallel or if we are checking
         // the last column
         if column_is_parallel[i] || i >= n_cols - 1 { continue 'rows; }
@@ -739,7 +740,7 @@ fn find_parallel_columns_between<S1, S2, S3, T: LComplexField> (
     // the outer loop) is parallel to any column of b (inner loop). By iterating via columns we would check if
     // any column of a is parallel to the, in that case, current column of b.
     // TODO:  Implement for column major arrays.
-    'rows: for (i, row) in c.genrows().into_iter().enumerate().take(n_cols) {
+    'rows: for (i, row) in c.rows().into_iter().enumerate().take(n_cols) {
         // Skip if the column is already found to be parallel the last column.
         if column_is_parallel[i] { continue 'rows; }
         for element in row {
@@ -794,7 +795,7 @@ fn are_all_columns_parallel_between<S1, S2, T: LComplexField> (
     // terms of logic there is no difference: we simply check if a specific column of a is parallel
     // to any column of b. By iterating via columns we would check if any column of a is parallel
     // to a specific column of b.
-    'rows: for row in c.genrows() {
+    'rows: for row in c.rows() {
         for element in row {
             // If a parallel column was found, cut to the next one.
             if T::abs(*element).to_subset().unwrap() == n_rows as f64 { continue 'rows; }
@@ -995,13 +996,14 @@ mod tests {
         let calculated = Array1::from(calculated);
         let expected = Array1::from(expected);
 
-        let mut underestimation_ratio = unsafe { Array1::<f64>::uninitialized(nsamples) };
+        let mut underestimation_ratio =   Array1::<f64>::uninit(nsamples);
         Zip::from(&calculated)
             .and(&expected)
             .and(&mut underestimation_ratio)
-            .apply(|c, e, u| {
-                *u = *c / *e;
+            .for_each(|c, e, u| {
+                unsafe { u.as_mut_ptr().write(*c / *e); }
         });
+        let underestimation_ratio = unsafe { underestimation_ratio.assume_init() };
 
         let underestimation_mean = underestimation_ratio.mean_axis(Axis(0)).unwrap().into_scalar();
         assert!(0.99 < underestimation_mean);

From d4a52d6712a6a40dc227aef1c78aee1772c4746f Mon Sep 17 00:00:00 2001
From: hmb <humberto.munozb@gmail.com>
Date: Mon, 13 Dec 2021 16:47:26 -0800
Subject: [PATCH 12/12] bump dependency versions

---
 Cargo.toml | 4 ++--
 src/lib.rs | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 63a7983..fbaf355 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,14 +14,14 @@ keywords = ["condition", "linalg", "matrix", "ndarray", "norm"]
 [dependencies]
 cblas = "0.4"
 lapacke = "0.5"
-lapack-traits = {version="0.3", features=["simba"]}
+lapack-traits = {version="0.4", features=["simba"]}
 ndarray = "0.15"
 num-traits = "0.2"
 ordered-float = "1.0"
 rand = "0.8"
 rand_distr = "0.4"
 rand_xoshiro = "0.6"
-simba="0.5"
+simba="0.6"
 
 #[patch.crates-io]
 #lapack-traits = {git="https://github.com/hmunozb/lapack-traits.git"}
diff --git a/src/lib.rs b/src/lib.rs
index 73a7f7b..9e7b458 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -236,7 +236,7 @@ impl<T: LComplexField> Normest1<T> {
         }
 
         // Set all columns to unit vectors
-        self.x_matrix.mapv_inplace(|x| (x / T::from_usize(n).unwrap()) );
+        self.x_matrix.mapv_inplace(|x| (x / T::from_subset(&(n as f64))) );
 
         let mut estimate = T::RealField::zero();
         let mut best_index = 0;
@@ -317,7 +317,7 @@ impl<T: LComplexField> Normest1<T> {
             let mut max_h = T::RealField::zero();
             for (row, h_element) in self.z_matrix.rows().into_iter().zip(self.h.iter_mut()) {
                 let h : T::RealField = vector_maxnorm(&row);
-                max_h = if h > max_h { h } else { max_h };
+                max_h = if h > max_h { h.clone() } else { max_h };
                 // Convert f64 to NotNan for using sort_unstable_by below
                 *h_element = h.to_subset().unwrap().into();
             }