Skip to content
This repository was archived by the owner on Aug 15, 2018. It is now read-only.

Commit 14a44ff

Browse files
sparkvillariccardomurri
authored andcommitted
optimized lcc for memory efficiency
1 parent a08830a commit 14a44ff

File tree

7 files changed

+60093
-97
lines changed

7 files changed

+60093
-97
lines changed

tmlib/workflow/dependencies.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ class CanonicalWorkflowDependencies(WorkflowDependencies):
141141
'imextract': {
142142
'metaconfig'
143143
},
144+
145+
'jterator':{
146+
147+
},
144148
'popcon' : {
145149
'jterator'
146150
}

tmlib/workflow/popcon/api.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def delete_previous_job_output(self):
9898
that were generated by a prior run of the same pipeline as well as all
9999
children instances for the processed experiment.
100100
'''
101-
pass
101+
pass
102102

103103
def run_job(self, batch, assume_clean_state):
104104
'''Runs the pipeline, i.e. executes modules sequentially. After
@@ -117,32 +117,34 @@ def run_job(self, batch, assume_clean_state):
117117
filter_by(well_id=well_id).all()
118118
wellY = sites[0][1]*len(set([i[3] for i in sites]))
119119
wellX = sites[0][2]*len(set([i[4]for i in sites]))
120-
121-
120+
121+
122122
extract_mapobject_type_id = session.query(tm.MapobjectType.id).\
123-
filter_by(name=batch['extract_object']).one()[0]
123+
filter_by(name=batch['extract_object']).one()[0]
124124
extract_seg_layer_id = session.query(tm.SegmentationLayer.id).\
125125
filter_by(mapobject_type_id=extract_mapobject_type_id).one()[0]
126126
extract_centroids = session.query(tm.MapobjectSegmentation.geom_centroid,tm.MapobjectSegmentation.mapobject_id,tm.MapobjectSegmentation.label,tm.MapobjectSegmentation.partition_key).\
127127
filter_by(segmentation_layer_id=extract_seg_layer_id).all()
128128
assign_mapobject_type_id = session.query(tm.MapobjectType.id).\
129-
filter_by(name=batch['assign_object']).one()[0]
129+
filter_by(name=batch['assign_object']).one()[0]
130130
assign_seg_layer_id = session.query(tm.SegmentationLayer.id).\
131131
filter_by(mapobject_type_id=assign_mapobject_type_id).one()[0]
132132
assign_centroids = session.query(tm.MapobjectSegmentation.geom_centroid,tm.MapobjectSegmentation.mapobject_id,tm.MapobjectSegmentation.label,tm.MapobjectSegmentation.partition_key).\
133133
filter_by(segmentation_layer_id=assign_seg_layer_id).all()
134-
134+
135135
logger.info('Calculating LCC for well_id %s', well_id)
136136
logger.info('Instantiating LCC for extract_object')
137137
lcc_extract = LocalCC(extract_centroids, wellY, wellX)
138-
logger.info('df lcc_extract: %s',lcc_extract.df.head())
138+
logger.info('df lcc_extract: %s',lcc_extract.df.head())
139139
logger.info(
140140
'wellX: %s, wellY: %s, diagonal: %s'
141141
, lcc_extract.wellX, lcc_extract.wellY, lcc_extract.well_diagonal)
142-
143-
real_lcc = lcc_extract.real_distances()
144-
random_lcc = lcc_extract.random_distances()
142+
143+
real_lcc = lcc_extract.gen_real_distances()
144+
random_lcc = lcc_extract.gen_random_distances()
145145
lcc = lcc_extract.get_lcc(real_lcc,random_lcc)
146+
147+
lcc_extract.df['lcc'] = lcc_extract.df['lcc'].round()
146148
logger.info('Instantiating LCC for assign_object')
147149
lcc_assign = LocalCC(assign_centroids, wellY, wellX)
148150

@@ -153,18 +155,18 @@ def run_job(self, batch, assume_clean_state):
153155
, batch['assign_object'], batch['extract_object'])
154156

155157
#logger.debug(
156-
# 'assign: %s extract: %s', lcc_assign.df['mapobject_id'], lcc_extract.df['mapobject_id'])
157-
158-
lcc_extract.df['mapobject_id'] = lcc_assign.df['mapobject_id']
158+
# 'assign: %s extract: %s', lcc_assign.df['mapobject_id'], lcc_extract.df['mapobject_id'])
159+
160+
lcc_extract.df['mapobject_id'] = lcc_assign.df['mapobject_id']
159161

160162
feature_name = 'LocalCellCrowding_{}'.format(batch['extract_object'])
161163
feature = session.get_or_create(
162-
tm.Feature, name=feature_name,
163-
mapobject_type_id=assign_mapobject_type_id,
164-
is_aggregate=False)
164+
tm.Feature, name=feature_name,
165+
mapobject_type_id=assign_mapobject_type_id,
166+
is_aggregate=False)
165167

166168

167-
for index, row in lcc_extract.df.iterrows():
169+
for index, row in lcc_extract.df.iterrows():
168170
feature_value = session.query(tm.FeatureValues).filter_by(mapobject_id= int(row['mapobject_id']) ).one()
169171
session.append_value(feature_value,str(feature.id),row['lcc'].astype(str))
170172
session.commit()
@@ -174,4 +176,3 @@ def run_job(self, batch, assume_clean_state):
174176

175177
def collect_job_output(self, batch):
176178
pass
177-

tmlib/workflow/popcon/lcc.py

Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -12,89 +12,91 @@ class LocalCC(object):
1212
@staticmethod
1313
def _get_yx(element):
1414
''' Helper. Takes in a WKBElement.
15-
Return a shapely.geometry.point.Point object
15+
Return a shapely.geometry.point.Point object
1616
'''
1717
return wkb.loads(bytes(element.data))
1818

1919
@staticmethod
20-
def _get_df(np_centroids):
21-
'''Helper. Takes numpy centroids array
22-
Return a sorted dataframe by site and label
20+
def _get_df(centroids):
21+
'''Helper. Takes a list of centroids coords.
22+
Return a sorted dataframe by site and label
2323
'''
24-
data = np.zeros( (len(np_centroids),5) )
24+
data = np.zeros( (len(centroids),5) )
2525
headers = ['y','x','mapobject_id','label','site']
26-
data[:] = np_centroids
26+
data[:] = centroids
2727
data_df = pd.DataFrame(data, columns=headers)
2828
df = data_df.sort_values(['site', 'label'])
2929
return df
3030

31+
32+
33+
3134
def __init__(self, centroids, wellY, wellX):
3235
'''
3336
Parameters
3437
----------
35-
centroids: list of tuples (centroid WKB element, mapobject_id, label,site);
38+
centroids: list of tuples (centroid WKB element, mapobject_id, label,site);
3639
i.e. query objects from MapobjectSegmentation table
3740
'''
38-
self.centroids = centroids
39-
40-
self.centroids_coordinates = [(abs(self._get_yx(element[0]).y),abs(self._get_yx(element[0]).x), int(element[1]), int(element[2]), int(element[3])) for element in self.centroids]
41-
41+
42+
self.centroids_coordinates = [
43+
( round (abs(self._get_yx(element[0]).y),1 ),\
44+
round (abs(self._get_yx(element[0]).x),1 ),\
45+
int(element[1]),\
46+
int(element[2]),\
47+
int(element[3]) ) for element in centroids]
48+
4249
self.df = self._get_df(self.centroids_coordinates)
4350
self.yx_coordinates = np.asarray((self.df['y'],self.df['x'])).transpose()
4451
self.wellY = wellY
4552
self.wellX = wellX
4653
self.well_diagonal = np.round(math.sqrt(self.wellX**2+self.wellY**2))
47-
48-
49-
50-
def real_distances(self):
54+
55+
56+
def gen_real_distances(self):
57+
'''
58+
Returns
59+
-------
60+
generator
61+
Sum of distances calculated from real positions
62+
'''
63+
64+
for yx_real in self.yx_coordinates:
65+
real_dist = distance.cdist(np.transpose(yx_real[:,np.newaxis]), self.yx_coordinates, 'euclidean')
66+
real_masked = np.ma.masked_where(real_dist==0,real_dist) # mask 0 values
67+
real_masked_divide = np.divide(self.well_diagonal, real_masked)
68+
yield np.sum(real_masked_divide.filled(fill_value=0))
69+
70+
71+
def gen_random_distances(self):
5172
'''
5273
Returns
5374
-------
54-
numpy array of distances from real positions
55-
'''
56-
real_dists = distance.cdist(self.yx_coordinates, self.yx_coordinates, 'euclidean')
57-
real_masked = np.ma.masked_where(real_dists==0,real_dists) # mask 0 values
58-
real_masked_divide = np.divide(self.well_diagonal, real_masked)
59-
return real_masked_divide.filled(fill_value=0)
60-
61-
def random_distances(self):
75+
generator
76+
Sum of distances calculated from random positions
6277
'''
63-
Returns
64-
-------
65-
numpy array of distances from random positions
66-
'''
67-
rand_dists= list()
68-
78+
6979
for yx_real in self.yx_coordinates:
7080
y_rand= np.random.uniform(0,self.wellY,len(self.yx_coordinates)-1)
71-
x_rand= np.random.uniform(0,self.wellX,len(self.yx_coordinates)-1)
81+
x_rand= np.random.uniform(0,self.wellX,len(self.yx_coordinates)-1)
7282
yx_coordinates_random = np.concatenate( (y_rand[:,np.newaxis],x_rand[:,np.newaxis]), axis=1)
73-
83+
7484
rand_dist = np.divide(self.well_diagonal, distance.cdist(np.transpose(yx_real[:,np.newaxis]), yx_coordinates_random, 'euclidean') )
75-
rand_dists.append(np.squeeze(rand_dist))
76-
return np.asarray(rand_dists)
77-
85+
yield np.sum(np.squeeze(rand_dist))
86+
7887

7988
def get_lcc(self,real_dists,random_dists):
8089
'''
8190
Parameters
8291
----------
83-
real_dists: numpy arrays of real distances
84-
random_dists: numpy array of random distances
92+
real_dists: generator of real distances
93+
random_dists: generator of random distances
8594
8695
Returns
8796
-------
88-
a numpy array i.e. a LCC value and mapobject_id for centroid
89-
'''
90-
sum_real = np.sum(real_dists, axis=1)
91-
sum_random = np.sum(random_dists, axis=1)
92-
lcc = sum_real-sum_random
93-
94-
self.df['lcc'] = pd.Series(lcc)
95-
return self.df
96-
# lcc[lcc[:,1].argsort()] sort array based on mapobject_id
97-
# lcc[lcc[:,0].argsort()] sort array based on lcc value
98-
97+
a pandas Dataframe i.e. y,x,mapobject_id,label,site,lcc
98+
'''
99+
lcc = [re_d-rn_d for re_d,rn_d in zip(real_dists,random_dists)]
99100

100-
101+
self.df['lcc'] = pd.Series(lcc)
102+
return self.df

0 commit comments

Comments
 (0)