-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhackknight.py
More file actions
205 lines (157 loc) · 9.25 KB
/
hackknight.py
File metadata and controls
205 lines (157 loc) · 9.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# -*- coding: utf-8 -*-
"""hackknight.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1SgCWEXkDpCLmJq6UCuEpAW8jG_cXACqM
"""
!pip install torch torchvision torchaudio
import torch, torchvision
from torchvision import datasets, transforms, models #transforms is a module
from torch.utils.data import random_split, DataLoader
from torch import nn, optim
import os
import torch.nn as nn
import sys
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report
import shutil
from pathlib import Path
import kagglehub #the dataset
old_path = kagglehub.dataset_download("alistairking/recyclable-and-household-waste-classification")
path = os.path.join(old_path, 'images', 'images')
print(f"Old path: {old_path}")
print(f"New path: {path}")
if os.path.exists(path):
contents = os.listdir(path)
print(f"Number of folders in images/: {len(contents)}")
print(f"folders: {contents}")
transform = transforms.Compose([ #transforms.compose is a class
#the rest below are classes
transforms.Resize((128, 128)), #images are of different sizes, have a standardized one for the model, we put as a tuple
transforms.RandomHorizontalFlip(), #a plastic bottle is gonna be the same left to right so this is teaching the model that doesn't matter - data augmentation
transforms.RandomRotation(10), #more data augmentation
transforms.ToTensor(), #computers don't understand images and neither does models, so we convert images to numbers which are pixel values from 0-255. Its easier to work
#with smaller numbers than bigger values so we divide each RGB value/255 to get values from 0-1 thats easier for model to calculate
transforms.Normalize(mean=[0.485, 0.456, 0.406], #the mean of RBG values that make up pixels
std=[0.229, 0.224, 0.225])
])
dataset = datasets.ImageFolder(path, transform=transform) #image folder is a class we instantiate
train = int(0.8 * len(dataset)) #we need training datasets and data validation datasets, 80% for training datasets
val = len(dataset) - train #data validation data set is 20%
train_data, val_data = random_split(dataset, [train, val]) #used to randomly divide a dataset into multiple non-overlapping subsets
train_loader = DataLoader(train_data, batch_size=32, shuffle=True) #process 32 images at a time, shuffle them in any order so model doesn't focus on an order
val_loader = DataLoader(val_data, batch_size=32) #order doesn't matter for validation so we skip
model = models.mobilenet_v2(weights="IMAGENET1K_V1") #deep learning model designed for computer vision tasks on mobile, due to its efficiency and speed.
#we are taking a model that has been made and pre-trained, we are training it on the kaggle data
#weighs are the numbers of the pre-trained model
#other models are rasNet50, mobilenet v2, efficientNET but they are slwer compared to mobilenet even if they have more accuracy
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 30) # model.classifier makes predictions, nn.Linear is the input (images) and the output(results) connected layer
#model.classifier[1].in_features the number of inputs going into the model
#30 is the possible output
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #training on GPU is faster and everything need to be in same device to avoid run-time errors
model.to(device) #put the model in the device
"""
Model we are using has model.features which is the feature extractor and model.classifier which is the decision maker
model classifier has 2 layers
* dropout layer (prevents overfitting) means when a model learns training data too well and can't perform well for the unseen data
* Linear layer (makes final prediction)
we want to modify [1] because the original classifier has 1000 outputs classes for 1280 features but we want 30 outputs (30 classes) for the 1280 features
* nn.Linear is the input (images) and the output(results) connected layer
* nn means neural network pytorch module and common components are nn.Linear, nn.Convo2d, nn.ReLU, nn.Dropout, nn.CrossEntropyLoss
Linear is a fully connected layer of inputs and outputs.
- the .in_features tells how many inputs to expect, the 30 is the outputs (classes). This is the only layer we are changing and we use all the above math functions to build the model
"""
print(f"Number of batches: {len(train_loader)}")
print(f"Number of training images: {len(train_data)}")
print(f"Dataset path: {path}")
print(f"Number of classes found: {len(dataset.classes)}")
print(f"Class names: {dataset.classes}")
from torch.amp import autocast, GradScaler
scaler=GradScaler('cuda') #gradscalar is a class, 'cuda' is what device are using (NVIDIA GPU)
# Freeze feature extractor
for param in model.features.parameters(): #.parameters is method that returns
param.requires_grad = False #the pre-trained model already knows how to
#handle edges, shapes and backgrounds so we don't want to mess with that.
#this will freeze the feature extraction part
criterion = nn.CrossEntropyLoss() #converts scores into porbabilities using softmax function, looks at probability for each class, if probability is low, returns higher loss (bad)
#so model adjusts its weights more
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001) #the fastest among others to help the model learn, it adjusts weights slowly for each epoch and we get the opytimized value
# Compile model (PyTorch 2.0+)
# if hasattr(torch, 'compile'):
# model = torch.compile(model)
for epoch in range(15): #goes through the entire dataset 5 times
model.train() #puts model in training model
total_loss = 0 #t calculate how much we improve
for images, labels in train_loader: #loops through all batches of 32 images
images, labels = images.to(device), labels.to(device) #moving data to GPU
with autocast('cuda'): #autocast is a class
outputs = model(images) #can use an object like a functon because pytorch models have a call forward method, so we are doing outputs = model.forward(images)
loss = criterion(outputs, labels) #outputs are the predictions and labels are teh correct answers
optimizer.zero_grad(set_to_none=True) #start a new batch of gradients for each batch so set_to_None = True deletes the previous batch
scaler.scale(loss).backward() #scale the loss because 32-bit and 16-bit numbers can be very small and cause underflow, gradients are calculated then scaled up
scaler.step(optimizer) #use the gradients to update the weights, but the scaled up gradients need to be scaled down
scaler.update() #find the right scale that we multiple with, sometimes the scaled number is overflow or underflow
total_loss += loss.item() #the .item() returns a number we need
print(f"Epoch [{epoch+1}/15], Loss: {total_loss/len(train_loader):.4f}")
sys.stdout.flush() #displays results in real-time because google colab only shows results after the cell is done excuting
"""**THE MATH FLOW OF THE MODEL**
STEP 1: preprocesssing
* we have images with RBG pixel values like image = [[255, 128, 64],...,]
* convert those into 0-1 by dividing /255 so its easier for neural networks to process
* normalize = (value-mean)/std, Model thinks: "bright images of plastic bottle = one type, dark images of plastic model = another type" that's not what the model needs to learn
* autocast using 16-bit numbers and gradscalar makes sure those numbers don't underflow
"""
model.eval() # Put model in evaluation mode, accuracy is 78%
y_true, y_pred = [], []
with torch.no_grad(): # Don't calculate gradients (saves memory)
for images, labels in val_loader:
images, labels = images.to(device), labels.to(device)
with autocast('cuda'):
outputs = model(images)
_, preds = torch.max(outputs, 1) # Get predicted class
y_true.append(labels.cpu())
y_pred.append(preds.cpu())
# Combine all batches
y_true = torch.cat(y_true).numpy()
y_pred = torch.cat(y_pred).numpy()
accuracy = accuracy_score(y_true, y_pred)
print(f"\n✓ Validation Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=dataset.classes))
torch.save(model, 'waste_classifier_full.pth')
co2_factors = {
"plastic_soda_bottles": 2.5,
"aerosol_cans": 6.5,
"steel_food_cans": 2.9,
"disposable_plastic_cutlery": 3.2,
"cardboard_boxes": 0.8,
"glass_beverage_bottles": 1.2,
"plastic_cup_lids": 2.8,
"plastic_straws": 2.0,
"plastic_shopping_bags": 3.0,
"styrofoam_cups": 6.0,
"cardboard_packaging": 0.9,
"glass_food_jars": 1.0,
"styrofoam_food_containers": 6.5,
"eggshells": 0.1,
"aluminum_food_cans": 9.0,
"coffee_grounds": 0.3,
"plastic_food_containers": 2.4,
"food_waste": 0.4,
"magazines": 0.7,
"shoes": 4.5,
"clothing": 3.0,
"aluminum_soda_cans": 8.8,
"plastic_detergent_bottles": 2.6,
"newspaper": 0.6,
"tea_bags": 0.2,
"office_paper": 0.8,
"plastic_water_bottles": 2.5,
"paper_cups": 1.0,
"glass_cosmetic_containers": 1.1,
"plastic_trash_bags": 2.8
}
def calculate_co2(predicted_class, weight):
factor = co2_factors.get(predicted_class.lower())
co2_saved = weight * factor
return co2_saved