-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathloader.py
More file actions
143 lines (122 loc) · 4.23 KB
/
loader.py
File metadata and controls
143 lines (122 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os
import sys
import joblib
# Add parent directory to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from data.config import RANDOM_STATE
def load_data(file_path):
"""
Load glucose level data from CSV file
Parameters:
-----------
file_path : str
Path to the CSV file
Returns:
--------
pandas.DataFrame
Loaded data
"""
try:
data = pd.read_csv(file_path)
print(f"Data loaded successfully with {data.shape[0]} rows and {data.shape[1]} columns")
return data
except Exception as e:
print(f"Error loading data: {e}")
return None
def preprocess_data(df):
"""
Preprocess the data for modeling
Parameters:
-----------
df : pandas.DataFrame
The raw data to preprocess
Returns:
--------
tuple
X_train, X_test, y_train, y_test, scaler, feature_names
"""
# Handle missing values
df = df.dropna(subset=['WEIGHT', 'HEIGHT']).copy()
df.loc[:, 'HEARTRATE'] = df['HEARTRATE'].fillna(df['HEARTRATE'].median())
df.loc[:, 'NIR_Reading'] = df['NIR_Reading'].fillna(df['NIR_Reading'].median())
df.loc[:, 'HR_IR'] = df['HR_IR'].fillna(df['HR_IR'].median())
df.loc[:, 'LAST_EATEN'] = df['LAST_EATEN'].fillna(df['LAST_EATEN'].median())
df.loc[:, 'DIABETIC'] = df['DIABETIC'].fillna(df['DIABETIC'].mode()[0])
# Encode categorical features
df.loc[:, 'GENDER'] = df['GENDER'].map({'M': 1, 'F': 0})
df.loc[:, 'DIABETIC'] = df['DIABETIC'].map({'Y': 1, 'N': 0})
feature_names = [
'AGE', 'GENDER', 'WEIGHT', 'SKIN_COLOR', 'NIR_Reading',
'HEARTRATE', 'HEIGHT', 'LAST_EATEN', 'DIABETIC', 'HR_IR'
]
target_col = 'GLUCOSE_LEVEL'
X = df[feature_names]
y = df[target_col]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
return X_train, X_test, y_train, y_test, scaler, feature_names
def save_processed_data(X_train, X_test, y_train, y_test, output_dir='datasets/processed'):
"""
Save processed data to disk
Parameters:
-----------
X_train : numpy.ndarray
Training features
X_test : numpy.ndarray
Test features
y_train : numpy.ndarray
Training target
y_test : numpy.ndarray
Test target
output_dir : str
Directory to save processed data
"""
# Create directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Save as numpy arrays
np.save(os.path.join(output_dir, 'X_train.npy'), X_train)
np.save(os.path.join(output_dir, 'X_test.npy'), X_test)
np.save(os.path.join(output_dir, 'y_train.npy'), y_train)
np.save(os.path.join(output_dir, 'y_test.npy'), y_test)
print(f"Processed data saved to {output_dir}")
def load_processed_data(input_dir='datasets/processed'):
"""
Load processed data from disk
Parameters:
-----------
input_dir : str
Directory containing processed data
Returns:
--------
tuple
X_train, X_test, y_train, y_test
"""
try:
X_train = np.load(os.path.join(input_dir, 'X_train.npy'))
X_test = np.load(os.path.join(input_dir, 'X_test.npy'))
y_train = np.load(os.path.join(input_dir, 'y_train.npy'))
y_test = np.load(os.path.join(input_dir, 'y_test.npy'))
print(f"Processed data loaded from {input_dir}")
return X_train, X_test, y_train, y_test
except Exception as e:
print(f"Error loading processed data: {e}")
return None, None, None, None
def load_model(model_path):
"""
Load a trained model from disk
Parameters:
-----------
model_path : str
Path to the model file
Returns:
--------
object
Loaded model
"""
return joblib.load(model_path)