[MRG+2] Add float32 support for Linear Discriminant Analysis (scikit-learn#13273)

thibsej · GaelVaroquaux · commit 415fd83dbf08 · 2019-02-27T10:10:01.000+01:00
* [skip ci] Empty commit to trigger PR

* Add dtype testing

* Fix: dtype testing

* Fix test_estimators[OneVsRestClassifier-check_estimators_dtypes]

* TST refactor using parametrize + Add failing test for int32

* Fix for int32

* Fix code according to review + Fix PEP8 violation

* Fix dtype for int32 and complex

* Fix pep8 violation

* Update whatsnew + test COSMIT
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
@@ -93,6 +93,10 @@ Support for Python 3.4 and below has been officially dropped.
 :mod:`sklearn.discriminant_analysis`
 ....................................
 
+- |Enhancement| :class:`discriminant_analysis.LinearDiscriminantAnalysis` now
+  preserves ``float32`` and ``float64`` dtypes. :issues:`8769` and
+  :issues:`11000` by :user:`Thibault Sejourne <thibsej>`
+
 - |Fix| A ``ChangedBehaviourWarning`` is now raised when
   :class:`discriminant_analysis.LinearDiscriminantAnalysis` is given as
   parameter ``n_components > min(n_features, n_classes - 1)``, and
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
@@ -427,7 +427,8 @@ def fit(self, X, y):
             Target values.
         """
         # FIXME: Future warning to be removed in 0.23
-        X, y = check_X_y(X, y, ensure_min_samples=2, estimator=self)
+        X, y = check_X_y(X, y, ensure_min_samples=2, estimator=self,
+                         dtype=[np.float64, np.float32])
         self.classes_ = unique_labels(y)
         n_samples, _ = X.shape
         n_classes = len(self.classes_)
@@ -485,9 +486,10 @@ def fit(self, X, y):
             raise ValueError("unknown solver {} (valid solvers are 'svd', "
                              "'lsqr', and 'eigen').".format(self.solver))
         if self.classes_.size == 2:  # treat binary case as a special case
-            self.coef_ = np.array(self.coef_[1, :] - self.coef_[0, :], ndmin=2)
+            self.coef_ = np.array(self.coef_[1, :] - self.coef_[0, :], ndmin=2,
+                                  dtype=X.dtype)
             self.intercept_ = np.array(self.intercept_[1] - self.intercept_[0],
-                                       ndmin=1)
+                                       ndmin=1, dtype=X.dtype)
         return self
 
     def transform(self, X):
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
@@ -7,6 +7,7 @@
 from sklearn.utils.testing import (assert_array_equal, assert_no_warnings,
                                    assert_warns_message)
 from sklearn.utils.testing import assert_array_almost_equal
+from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
@@ -296,6 +297,31 @@ def test_lda_dimension_warning(n_classes, n_features):
         assert_warns_message(FutureWarning, future_msg, lda.fit, X, y)
 
 
+@pytest.mark.parametrize("data_type, expected_type", [
+    (np.float32, np.float32),
+    (np.float64, np.float64),
+    (np.int32, np.float64),
+    (np.int64, np.float64)
+])
+def test_lda_dtype_match(data_type, expected_type):
+    for (solver, shrinkage) in solver_shrinkage:
+        clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        clf.fit(X.astype(data_type), y.astype(data_type))
+        assert clf.coef_.dtype == expected_type
+
+
+def test_lda_numeric_consistency_float32_float64():
+    for (solver, shrinkage) in solver_shrinkage:
+        clf_32 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        clf_32.fit(X.astype(np.float32), y.astype(np.float32))
+        clf_64 = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
+        clf_64.fit(X.astype(np.float64), y.astype(np.float64))
+
+        # Check value consistency between types
+        rtol = 1e-6
+        assert_allclose(clf_32.coef_, clf_64.coef_, rtol=rtol)
+
+
 def test_qda():
     # QDA classification.
     # This checks that QDA implements fit and predict and returns