-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathvisualize_dataset.py
More file actions
100 lines (86 loc) · 3.53 KB
/
visualize_dataset.py
File metadata and controls
100 lines (86 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import random
import matplotlib.pyplot as plt
from skimage import io
# Define the paths to your dataset relative to the script location
script_dir = os.path.dirname(os.path.abspath(__file__))
train_path = os.path.join(script_dir, "Train")
test_path = os.path.join(script_dir, "Test")
def load_images_from_folder(folder):
images = []
for filename in os.listdir(folder):
file_path = os.path.join(folder, filename)
if os.path.isfile(file_path):
img = io.imread(file_path)
if img is not None:
images.append(img)
return images
def plot_class_distribution(folders):
class_counts = {}
for folder in folders:
classes = os.listdir(folder)
for class_name in classes:
class_path = os.path.join(folder, class_name)
if os.path.isdir(class_path):
num_images = len(os.listdir(class_path))
if class_name in class_counts:
class_counts[class_name] += num_images
else:
class_counts[class_name] = num_images
classes = list(class_counts.keys())
num_images_per_class = list(class_counts.values())
plt.figure(figsize=(10, 5))
bars = plt.bar(classes, num_images_per_class, color='skyblue')
plt.xlabel('Classes')
plt.ylabel('Number of Images')
plt.title('Class Distribution')
# Annotate each bar with its corresponding number of images
for bar, num_images in zip(bars, num_images_per_class):
plt.text(bar.get_x() + bar.get_width() / 2,
bar.get_height(),
f'{num_images}',
ha='center',
va='bottom')
plt.show()
def plot_pixel_intensity_distribution(images, title):
plt.figure(figsize=(10, 5))
for img in images:
plt.hist(img.ravel(), bins=256, alpha=0.5, label='Intensity')
plt.xlabel('Pixel Intensity')
plt.ylabel('Frequency')
plt.title(title)
plt.legend()
plt.show()
def plot_sample_images(images, title):
num_images = len(images)
rows = 5
cols = 3
plt.figure(figsize=(15, 15))
plt.suptitle(title)
for i in range(rows * cols):
plt.subplot(rows, cols * 2, 2 * i + 1)
plt.imshow(images[i], cmap='gray') # Display grayscale images
plt.axis('off')
plt.subplot(rows, cols * 2, 2 * i + 2)
plt.hist(images[i].ravel(), bins=256, color='skyblue', alpha=0.7)
plt.xlabel('Pixel Intensity')
plt.ylabel('Frequency')
plt.show()
def visualize_dataset(train_path, test_path):
# Load sample images from each class in training set
train_classes = os.listdir(train_path)
sample_images_per_class = {}
for class_name in train_classes:
class_path = os.path.join(train_path, class_name)
if os.path.isdir(class_path):
class_images = load_images_from_folder(class_path)
sample_images_per_class[class_name] = random.sample(class_images, 15)
# Plot class distribution for both training and test sets
plot_class_distribution([train_path, test_path])
# Plot pixel intensity distribution for each class in training set
for class_name, images in sample_images_per_class.items():
plot_pixel_intensity_distribution(images, f'Pixel Intensity Distribution - {class_name}')
# Plot sample images with pixel intensity histograms for each class in training set
for class_name, images in sample_images_per_class.items():
plot_sample_images(images, f'Sample Images - {class_name}')
visualize_dataset(train_path, test_path)