44from ._types import BOOL
55from sklearn .base import RegressorMixin , BaseEstimator , TransformerMixin
66from sklearn .utils .validation import (assert_all_finite , check_is_fitted ,
7- check_X_y )
7+ check_X_y , check_array )
88import numpy as np
99from scipy import sparse
1010from ._version import get_versions
11+ try :
12+ from sklearn .utils .estimator_checks import check_complex_data
13+ except ImportError :
14+ check_complex_data = lambda x : x
15+
1116__version__ = get_versions ()['version' ]
1217
1318class Earth (BaseEstimator , RegressorMixin , TransformerMixin ):
@@ -256,11 +261,11 @@ class Earth(BaseEstimator, RegressorMixin, TransformerMixin):
256261 specified, then it is dict where each key is a feature importance type
257262 name and its corresponding value is an array of shape m.
258263
259- `_version `: string
260- The version of py-earth in which the Earth object was originally
261- created . This information may be useful when dealing with
264+ `fit_version_ `: string
265+ The version of py-earth with which the Earth object was originally
266+ fitted . This information may be useful when dealing with
262267 serialized Earth objects.
263-
268+
264269
265270 References
266271 ----------
@@ -293,7 +298,7 @@ class Earth(BaseEstimator, RegressorMixin, TransformerMixin):
293298 'feature_importance_type' ,
294299 'verbose'
295300 ])
296-
301+
297302 def __init__ (self , max_terms = None , max_degree = None , allow_missing = False ,
298303 penalty = None , endspan_alpha = None , endspan = None ,
299304 minspan_alpha = None , minspan = None ,
@@ -323,7 +328,6 @@ def __init__(self, max_terms=None, max_degree=None, allow_missing=False,
323328 self .enable_pruning = enable_pruning
324329 self .feature_importance_type = feature_importance_type
325330 self .verbose = verbose
326- self ._version = __version__
327331
328332 def __eq__ (self , other ):
329333 if self .__class__ is not other .__class__ :
@@ -399,6 +403,7 @@ def _scrub_x(self, X, missing, **kwargs):
399403 raise TypeError ('A sparse matrix was passed, but dense data '
400404 'is required. Use X.toarray() to convert to '
401405 'dense.' )
406+ check_array (X , ensure_2d = True , force_all_finite = False )
402407 X = np .asarray (X , dtype = np .float64 , order = 'F' )
403408
404409 # Figure out missingness
@@ -466,14 +471,16 @@ def _scrub(self, X, y, sample_weight, output_weight, missing, **kwargs):
466471
467472 # Convert y to internally used data type
468473 y = np .asarray (y , dtype = np .float64 )
469- assert_all_finite (y )
470474
471475 if len (y .shape ) == 1 :
472476 y = y [:, np .newaxis ]
473477
474478 # Deal with sample_weight
475479 if sample_weight is None :
476- sample_weight = np .ones ((y .shape [0 ], 1 ), dtype = y .dtype )
480+ try :
481+ sample_weight = np .ones ((y .shape [0 ], 1 ), dtype = y .dtype )
482+ except :
483+ raise
477484 else :
478485 sample_weight = np .asarray (sample_weight , dtype = np .float64 )
479486 assert_all_finite (sample_weight )
@@ -501,10 +508,11 @@ def _scrub(self, X, y, sample_weight, output_weight, missing, **kwargs):
501508
502509 # Make sure everything is finite (except X, which is allowed to have
503510 # missing values)
504- assert_all_finite (missing )
505- assert_all_finite (y )
506- assert_all_finite (sample_weight )
507- assert_all_finite (output_weight )
511+ check_array (missing )
512+ check_array (y , ensure_2d = False )
513+ check_array (sample_weight , ensure_2d = False )
514+ if output_weight is not None :
515+ check_array (output_weight , ensure_2d = False )
508516
509517 # Make sure everything is consistent
510518 check_X_y (X , y , accept_sparse = False , multi_output = True ,
@@ -598,7 +606,7 @@ def fit(self, X, y=None,
598606 for k in feature_importance_type :
599607 if k not in FEAT_IMP_CRITERIA :
600608 msg = ("'{}' is not valid value for feature_importance, "
601- "allowed critera are : {}" .format (k , FEAT_IMP_CRITERIA ))
609+ "allowed criteria are : {}" .format (k , FEAT_IMP_CRITERIA ))
602610 raise ValueError (msg )
603611
604612 if len (feature_importance_type ) > 0 and self .enable_pruning is False :
@@ -622,6 +630,10 @@ def fit(self, X, y=None,
622630 self .basis_ = self .basis_ .smooth (X )
623631 self .linear_fit (X , y , sample_weight , output_weight , missing ,
624632 skip_scrub = True )
633+
634+ # Record the version used for fitting
635+ self .fit_version_ = __version__
636+
625637 return self
626638
627639# def forward_pass2(self, X, y=None,
@@ -1271,51 +1283,51 @@ def score(self, X, y=None, sample_weight=None, output_weight=None,
12711283# mse0 = np.sum(y_sqr * output_weight) / m
12721284 return 1 - (mse / mse0 )
12731285
1274- def score_samples (self , X , y = None , missing = None ):
1275- '''
1276-
1277- Calculate sample-wise fit scores.
1278-
1279- Parameters
1280- ----------
1281-
1282- X : array-like, shape = [m, n] where m is the number of samples
1283- and n is the number of features The training predictors.
1284- The X parameter can be a numpy array, a pandas DataFrame, a patsy
1285- DesignMatrix, or a tuple of patsy DesignMatrix objects as output
1286- by patsy.dmatrices.
1287-
1288- y : array-like, optional (default=None), shape = [m, p] where m is the
1289- number of samples, p the number of outputs.
1290- The y parameter can be a numpy array, a pandas DataFrame,
1291- a Patsy DesignMatrix, or can be left as None (default) if X was
1292- the output of a call to patsy.dmatrices (in which case, X contains
1293- the response).
1294-
1295- missing : array-like, shape = [m, n] where m is the number of samples
1296- and n is the number of features.
1297- The missing parameter can be a numpy array, a pandas DataFrame, or
1298- a patsy DesignMatrix. All entries will be interpreted as boolean
1299- values, with True indicating the corresponding entry in X should be
1300- interpreted as missing. If the missing argument not used but the X
1301- argument is a pandas DataFrame, missing will be inferred from X if
1302- allow_missing is True.
1303-
1304- Returns
1305- -------
1306-
1307- scores : array of shape=[m, p] of floats with maximum value of 1
1308- (it can be negative).
1309- The scores represent how good each output of each example is
1310- predicted, a perfect score would be 1
1311- (the score can be negative).
1312-
1313- '''
1314- X , y , sample_weight , output_weight , missing = self ._scrub (
1315- X , y , None , None , missing )
1316- y_hat = self .predict (X , missing = missing )
1317- residual = 1 - (y - y_hat ) ** 2 / y ** 2
1318- return residual
1286+ # def score_samples(self, X, y, missing=None):
1287+ # '''
1288+ #
1289+ # Calculate sample-wise fit scores.
1290+ #
1291+ # Parameters
1292+ # ----------
1293+ #
1294+ # X : array-like, shape = [m, n] where m is the number of samples
1295+ # and n is the number of features The training predictors.
1296+ # The X parameter can be a numpy array, a pandas DataFrame, a patsy
1297+ # DesignMatrix, or a tuple of patsy DesignMatrix objects as output
1298+ # by patsy.dmatrices.
1299+ #
1300+ # y : array-like, optional (default=None), shape = [m, p] where m is the
1301+ # number of samples, p the number of outputs.
1302+ # The y parameter can be a numpy array, a pandas DataFrame,
1303+ # a Patsy DesignMatrix, or can be left as None (default) if X was
1304+ # the output of a call to patsy.dmatrices (in which case, X contains
1305+ # the response).
1306+ #
1307+ # missing : array-like, shape = [m, n] where m is the number of samples
1308+ # and n is the number of features.
1309+ # The missing parameter can be a numpy array, a pandas DataFrame, or
1310+ # a patsy DesignMatrix. All entries will be interpreted as boolean
1311+ # values, with True indicating the corresponding entry in X should be
1312+ # interpreted as missing. If the missing argument not used but the X
1313+ # argument is a pandas DataFrame, missing will be inferred from X if
1314+ # allow_missing is True.
1315+ #
1316+ # Returns
1317+ # -------
1318+ #
1319+ # scores : array of shape=[m, p] of floats with maximum value of 1
1320+ # (it can be negative).
1321+ # The scores represent how good each output of each example is
1322+ # predicted, a perfect score would be 1
1323+ # (the score can be negative).
1324+ #
1325+ # '''
1326+ # X, y, sample_weight, output_weight, missing = self._scrub(
1327+ # X, y, None, None, missing)
1328+ # y_hat = self.predict(X, missing=missing)
1329+ # residual = 1 - (y - y_hat) ** 2 / y**2
1330+ # return residual
13191331
13201332 def transform (self , X , missing = None ):
13211333 '''
0 commit comments