-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathengineHealthModel.py
More file actions
143 lines (114 loc) · 5.64 KB
/
engineHealthModel.py
File metadata and controls
143 lines (114 loc) · 5.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
class EngineHealthModel:
"""
A model for analyzing engine health based on sensor data.
This class analyzes correlations between features and predicts engine health.
"""
def __init__(self):
self.model = RandomForestRegressor(n_estimators=100, random_state=42)
self.scaler = StandardScaler()
self.pca = PCA(n_components=0.95) # Keep 95% of variance
self.is_fitted = False
self.correlation_matrix = None
self.engine_health_scores = None
def load_data(self, file_path):
columns = ["id", "time"] + [f"op_setting_{i}" for i in range(1, 4)] + [f"sensor_{i}" for i in range(1, 22)]
df = pd.read_csv(file_path, delim_whitespace=True, header=None, names=columns)
return df
def analyze_correlations(self, df):
"""Analyze correlations between all features"""
# Select only sensor and operational setting columns
sensor_cols = [col for col in df.columns if col.startswith('sensor_') or col.startswith('op_setting_')] #dont take op_settings
self.correlation_matrix = df[sensor_cols].corr()
return self.correlation_matrix
def get_top_correlations(self, n=10):
"""Get the top n most correlated feature pairs"""
if self.correlation_matrix is None:
raise ValueError("Correlation analysis must be performed first")
# Get upper triangle of correlation matrix
upper_tri = self.correlation_matrix.where(np.triu(np.ones(self.correlation_matrix.shape), k=1).astype(bool))
# Find top correlations
top_correlations = []
for i in range(len(upper_tri.columns)):
for j in range(i+1, len(upper_tri.columns)):
col1 = upper_tri.columns[i]
col2 = upper_tri.columns[j]
corr_value = upper_tri.iloc[i, j]
if not np.isnan(corr_value):
top_correlations.append((col1, col2, corr_value))
# Sort by absolute correlation value
top_correlations.sort(key=lambda x: abs(x[2]), reverse=True)
return top_correlations[:n]
def prepare_data(self, df):
"""Prepare data for engine health prediction"""
# Group data by engine ID
engine_groups = df.groupby('id')
# Extract features and calculate health indicators
X = []
engine_ids = []
for engine_id, engine_data in engine_groups:
# Calculate statistics for each sensor
sensor_stats = []
for col in [col for col in engine_data.columns if col.startswith('sensor_') or col.startswith('op_setting_')]:
sensor_stats.extend([
engine_data[col].mean(),
engine_data[col].std(),
engine_data[col].max(),
engine_data[col].min()
])
X.append(sensor_stats)
engine_ids.append(engine_id)
X = np.array(X)
# Scale the features
X_scaled = self.scaler.fit_transform(X)
# Apply PCA for dimensionality reduction
X_pca = self.pca.fit_transform(X_scaled)
return X_pca, engine_ids
def calculate_health_scores(self, X_pca):
"""Calculate health scores for each engine"""
# Use the first principal component as a health indicator
# Higher values indicate better health
health_scores = -X_pca[:, 0] # Negate to make higher values better
# Normalize to 0-100 range
health_scores = (health_scores - health_scores.min()) / (health_scores.max() - health_scores.min()) * 100
self.engine_health_scores = health_scores
return health_scores
def train(self, X_pca, health_scores):
"""Train the model to predict health scores"""
self.model.fit(X_pca, health_scores)
self.is_fitted = True
def predict_health(self, X_pca):
"""Predict health scores for engines"""
if not self.is_fitted:
raise ValueError("Model must be trained before prediction")
return self.model.predict(X_pca)
def visualize_correlations(self):
"""Visualize the correlation matrix"""
if self.correlation_matrix is None:
raise ValueError("Correlation analysis must be performed first")
plt.figure(figsize=(12, 10))
sns.heatmap(self.correlation_matrix, annot=False, cmap='coolwarm', linewidths=0.5)
plt.title('Feature Correlation Matrix')
plt.tight_layout()
# Return the figure for display in the GUI
return plt.gcf()
def visualize_health_scores(self, engine_ids):
"""Visualize engine health scores"""
if self.engine_health_scores is None:
raise ValueError("Health scores must be calculated first")
plt.figure(figsize=(12, 6))
plt.bar(engine_ids, self.engine_health_scores)
plt.xlabel('Engine ID')
plt.ylabel('Health Score (0-100)')
plt.title('Engine Health Scores')
plt.xticks(rotation=90)
plt.tight_layout()
# Return the figure for display in the GUI
return plt.gcf()