-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathread_datasets.py
More file actions
137 lines (112 loc) · 4.54 KB
/
read_datasets.py
File metadata and controls
137 lines (112 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import tarfile
import random
import sys
from six.moves import urllib
import urllib.request
import pickle
import gzip
import zipfile
import numpy as np
EMNIST_URL = 'http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip'
def get_emnist_letters(fname='emnist-letters-from-src.pkl'):
_maybe_download_emnist()
dirname = os.path.dirname(os.path.abspath(__file__))
dirname = os.path.join(dirname, 'data')
fname = os.path.join(dirname, fname)
if os.path.exists(fname):
with open(fname, 'rb') as fo:
obj = pickle.load(fo)
x_train = obj['x_train']
y_train = obj['y_train']
x_test = obj['x_test']
y_test = obj['y_test']
x_valid = obj['x_valid']
y_valid = obj['y_valid']
else:
gzip_path = os.path.dirname(os.path.abspath(__file__))
gzip_path = os.path.join(gzip_path, 'data')
dst_path = os.path.join(gzip_path, 'emnist_extracted')
gzip_path = os.path.join(gzip_path, 'gzip.zip')
if not os.path.exists(dst_path):
os.makedirs(dst_path)
dst_path = os.path.join(dst_path)
fnames_dict = {
'x_test': 'emnist-letters-test-images-idx3-ubyte.gz',
'y_test': 'emnist-letters-test-labels-idx1-ubyte.gz',
'x_train': 'emnist-letters-train-images-idx3-ubyte.gz',
'y_train': 'emnist-letters-train-labels-idx1-ubyte.gz'
}
fullpaths = {k: os.path.join(dst_path, 'gzip', v) for k, v in fnames_dict.items()}
for attempt in range(5):
try:
zip_ref = zipfile.ZipFile(gzip_path)
zip_ref.extractall(dst_path)
zip_ref.close()
break
except zipfile.BadZipFile:
if attempt == 4:
err_msg = ("Can't download EMNIST dataset. Try "
"downloading EMNIST dataset manually and place the "
"gzip.zip file to the parallel-tempring/simulator/data "
"folder.")
raise ValueError(err_msg)
os.remove(gzip_path)
_maybe_download_emnist()
def _read4bytes(bytestream):
dtype = np.dtype(np.uint32).newbyteorder('>')
return np.frombuffer(bytestream.read(4), dtype=dtype)[0]
def ungzip_data(fname):
with gzip.GzipFile(fname, 'r') as fo:
magic = _read4bytes(fo)
n_images = _read4bytes(fo)
n_rows = _read4bytes(fo)
n_cols = _read4bytes(fo)
buf = fo.read()
data = np.frombuffer(buf, dtype=np.uint8)
return data.reshape(n_images, n_rows, n_cols, 1)
def ungzip_labels(fname):
with gzip.GzipFile(fname, 'r') as fo:
magic = _read4bytes(fo)
n_labels = _read4bytes(fo)
buf = fo.read()
data = np.frombuffer(buf, dtype=np.uint8)
return data
x_train = ungzip_data(fullpaths['x_train'])
y_train = ungzip_labels(fullpaths['y_train'])
x_test = ungzip_data(fullpaths['x_test'])
y_test = ungzip_labels(fullpaths['y_test'])
return x_train, y_train, x_test, y_test
def _maybe_download_emnist():
filepath = os.path.dirname(os.path.abspath(__file__))
filepath = os.path.join(filepath, 'data')
if not os.path.exists(filepath):
os.makedirs(filepath)
filename = EMNIST_URL.split('/')[-1]
filepath = os.path.join(filepath, filename)
if os.path.exists(filepath):
return
def _progress(count, block_size, total_size):
buff = '\r>> Downloading EMNIST %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0)
sys.stdout.write(buff)
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(
EMNIST_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded EMNIST dataset', statinfo.st_size, 'bytes.')
# def read_cd_dataset():
# data = np.zeros((, 180, 180))
# labels = np.zeros((,))
# i = 0
# for d in ['data/Petmages/Cat', 'data/PetImages/Dog/']:
# for f in os.listdir(d):
# img = keras.preprocessing.image.load_img(
# f, target_size=(180,180)
# )
# img_array = keras.preprocessing.image.img_to_array(img)
# data[i] = img_array
# if 'Dog' in d:
# labels[i] = 1
# i += 1