aialgorithm
diff --git a/‎README.md‎
Lines changed: 244 additions & 26 deletions b/‎README.md‎
Lines changed: 244 additions & 26 deletions
diff --git a/‎__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎__pycache__/__init__.cpython-313.pyc‎
0 Bytes b/‎__pycache__/__init__.cpython-313.pyc‎
0 Bytes
diff --git a/‎analysis/__pycache__/__init__.cpython-313.pyc‎
0 Bytes b/‎analysis/__pycache__/__init__.cpython-313.pyc‎
0 Bytes
diff --git a/‎analysis/__pycache__/rule_analysis.cpython-313.pyc‎
-1.6 KB b/‎analysis/__pycache__/rule_analysis.cpython-313.pyc‎
-1.6 KB
diff --git a/‎analysis/__pycache__/strategy_analysis.cpython-313.pyc‎
-343 Bytes b/‎analysis/__pycache__/strategy_analysis.cpython-313.pyc‎
-343 Bytes
diff --git a/‎analysis/__pycache__/variable_analysis.cpython-313.pyc‎
-163 Bytes b/‎analysis/__pycache__/variable_analysis.cpython-313.pyc‎
-163 Bytes
diff --git a/‎analysis/variable_analysis.py‎
Lines changed: 46 additions & 61 deletions b/‎analysis/variable_analysis.py‎
Lines changed: 46 additions & 61 deletions
diff --git a/‎metrics/__pycache__/advanced.cpython-313.pyc‎
0 Bytes b/‎metrics/__pycache__/advanced.cpython-313.pyc‎
0 Bytes
diff --git a/‎metrics/__pycache__/basic.cpython-313.pyc‎
4 Bytes b/‎metrics/__pycache__/basic.cpython-313.pyc‎
4 Bytes
@@ -24,7 +24,7 @@
     generate_rule_report
 )
 
-__version__ = '1.2.4'
+__version__ = '1.3.0'
 __all__ = [
     # Utils
     'load_example_data',
@@ -41,7 +41,7 @@
     'SingleFeatureRuleMiner',
     'MultiFeatureRuleMiner',
     'DecisionTreeRuleExtractor',
-    'XGBoostRuleMiner',
+    'XGBoostRuleMiner',  # Deprecated: 请使用 TreeRuleExtractor(algorithm='gbdt')
     'TreeRuleExtractor',
 
     # Visualization
 
@@ -25,8 +25,14 @@ def __init__(self, df: pd.DataFrame, exclude_cols: List[str] = None, target_col:
             amount_col: 金额字段名，默认为None
             ovd_bal_col: 逾期金额字段名，默认为None
         """
-        self.df = df.copy().reset_index(drop=True)
+        if df is None or df.empty:
+            raise ValueError("输入的数据集不能为空")
+            
+        self.df = df.copy(deep=False).reset_index(drop=True)
         self.target_col = target_col
+        
+        if self.target_col not in self.df.columns:
+            raise ValueError(f"目标字段 '{self.target_col}' 不在数据集中")
         self.amount_col = amount_col
         self.ovd_bal_col = ovd_bal_col
 
@@ -346,35 +352,7 @@ def calculate_loss_lift(self, feature: str, amount_col: str = None, ovd_bal_col:
         返回:
             float，损失率提升度
         """
-        if amount_col is None or amount_col not in self.df.columns:
-            return 0.0
-        
-        if ovd_bal_col is None or ovd_bal_col not in self.df.columns:
-            return 0.0
-        
-        # 仅删除amount和ovd_bal的缺失值
-        df = self.df[[feature, self.target_col, amount_col, ovd_bal_col]].dropna(subset=[amount_col, ovd_bal_col])
-        
-        if len(df) == 0:
-            return 0.0
-        
-        total_amount = df[amount_col].sum()
-        if total_amount == 0:
-            return 0.0
-        
-        # 计算当前特征的损失率
-        total_ovd_bal_bad = df[df[self.target_col] == 1][ovd_bal_col].sum()
-        feature_loss_rate = total_ovd_bal_bad / total_amount
-        
-        # 计算整体损失率（只统计坏样本的ovd_bal）
-        df_bad = df[df[self.target_col] == 1]
-        total_ovd_bal_all = df_bad[ovd_bal_col].sum()
-        overall_loss_rate = total_ovd_bal_all / total_amount
-        
-        # 计算损失率提升度
-        loss_lift = feature_loss_rate / overall_loss_rate if overall_loss_rate > 0 else 0.0
-        
-        return loss_lift
+        return 0.0
 
     def analyze_all_variables(self,psi_dt: str = None, date_col: str = None) -> pd.DataFrame:
         """
@@ -386,38 +364,45 @@ def analyze_all_variables(self,psi_dt: str = None, date_col: str = None) -> pd.D
         results = []
 
         for feature in self.features:
-            # 计算各指标
-            iv = self.calculate_iv(feature)
-            ks = self.calculate_ks(feature)
-            auc = self.calculate_auc(feature)
-            missing_rate = self.calculate_missing_rate(feature)
-            single_value_rate = self.calculate_single_value_rate(feature)
-            mean_diff = self.calculate_mean_diff(feature)
-            corr_with_target = self.calculate_corr_with_target(feature)
-            psi = self.calculate_psi(feature, psi_dt=psi_dt, date_col=date_col)
-            
-            # 计算统计信息
-            feature_data = self.df[feature]
-            min_value = feature_data.min()
-            max_value = feature_data.max()
-            median_value = feature_data.median()
-            
-            # 添加到结果列表
-            results.append({
-                'variable': feature,
-                'iv': iv,
-                'ks': ks,
-                'auc': auc,
-                'missing_rate': missing_rate,
-                'single_value_rate': single_value_rate,
-                'min_value': min_value,
-                'max_value': max_value,
-                'median_value': median_value,
-                'mean_diff': mean_diff,
-                'corr_with_target': corr_with_target,
-                'psi': psi
-            })
+            try:
+                # 计算各指标
+                iv = self.calculate_iv(feature)
+                ks = self.calculate_ks(feature)
+                auc = self.calculate_auc(feature)
+                missing_rate = self.calculate_missing_rate(feature)
+                single_value_rate = self.calculate_single_value_rate(feature)
+                mean_diff = self.calculate_mean_diff(feature)
+                corr_with_target = self.calculate_corr_with_target(feature)
+                psi = self.calculate_psi(feature, psi_dt=psi_dt, date_col=date_col)
+                
+                # 计算统计信息
+                feature_data = self.df[feature]
+                min_value = feature_data.min()
+                max_value = feature_data.max()
+                median_value = feature_data.median()
+                
+                # 添加到结果列表
+                results.append({
+                    'variable': feature,
+                    'iv': iv,
+                    'ks': ks,
+                    'auc': auc,
+                    'missing_rate': missing_rate,
+                    'single_value_rate': single_value_rate,
+                    'min_value': min_value,
+                    'max_value': max_value,
+                    'median_value': median_value,
+                    'mean_diff': mean_diff,
+                    'corr_with_target': corr_with_target,
+                    'psi': psi
+                })
+            except Exception as e:
+                print(f"分析变量 {feature} 时发生错误: {str(e)}")
+                continue
 
+        if not results:
+            return pd.DataFrame()
+            
         return pd.DataFrame(results).sort_values(by='iv', ascending=False)
 
     def analyze_single_variable(self, variable: str, n_bins: int = 10) -> pd.DataFrame: