-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
93 lines (73 loc) · 2.91 KB
/
utils.py
File metadata and controls
93 lines (73 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
import logging
def setup_logging():
"""
Set up the logging configuration for the project.
This function configures the logging module with the following settings:
- Log level: INFO
- Log format: timestamp - log level - message
- Date format: YYYY-MM-DD HH:MM:SS
"""
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
def calculate_performance_metrics(y_true, y_pred_proba):
"""
Calculate various performance metrics for the model.
This function computes the following metrics:
1. ROC AUC (Receiver Operating Characteristic Area Under the Curve)
2. PR AUC (Precision-Recall Area Under the Curve)
3. Best F1 Score and its corresponding threshold
Args:
y_true (np.array): True labels
y_pred_proba (np.array): Predicted probabilities for the positive class
Returns:
dict: A dictionary containing the calculated metrics
"""
# Calculate ROC AUC
roc_auc = roc_auc_score(y_true, y_pred_proba)
# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(y_true, y_pred_proba)
pr_auc = auc(recall, precision)
# Calculate F1 score at different thresholds
f1_scores = []
thresholds = np.arange(0.1, 1.0, 0.1)
for threshold in thresholds:
y_pred = (y_pred_proba >= threshold).astype(int)
precision = np.sum((y_pred == 1) & (y_true == 1)) / (np.sum(y_pred == 1) + 1e-10)
recall = np.sum((y_pred == 1) & (y_true == 1)) / (np.sum(y_true == 1) + 1e-10)
f1 = 2 * (precision * recall) / (precision + recall + 1e-10)
f1_scores.append(f1)
best_threshold = thresholds[np.argmax(f1_scores)]
best_f1 = max(f1_scores)
return {
"ROC AUC": roc_auc,
"PR AUC": pr_auc,
"Best F1 Score": best_f1,
"Best Threshold": best_threshold
}
def save_results(results, filename="results.csv"):
"""
Save results to a CSV file.
Args:
results (list): A list of dictionaries containing the results
filename (str, optional): The name of the file to save the results. Defaults to "results.csv".
"""
df = pd.DataFrame(results)
df.to_csv(filename, index=False)
logging.info(f"Results saved to {filename}")
if __name__ == "__main__":
# Test the functions
setup_logging()
logging.info("Testing utils functions")
# Generate some dummy data
np.random.seed(42)
y_true = np.random.randint(0, 2, 1000)
y_pred_proba = np.random.random(1000)
metrics = calculate_performance_metrics(y_true, y_pred_proba)
logging.info("Performance metrics:")
for metric, value in metrics.items():
logging.info(f"{metric}: {value}")
save_results([metrics], "test_results.csv")