Skip to content

Commit ddb9fd4

Browse files
authored
Last-half timer
1 parent 95c8057 commit ddb9fd4

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

src/modules/ObjectDetectionCoral/objectdetection_coral_multitpu.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,8 @@ def main():
231231

232232
thread_cnt = 16
233233
tot_infr_time = 0
234+
half_wall_start = 0
235+
half_infr_count = 0
234236
if args.count > 1:
235237
with concurrent.futures.ThreadPoolExecutor(max_workers=thread_cnt) as executor:
236238
start = time.perf_counter()
@@ -240,13 +242,20 @@ def main():
240242
for f in concurrent.futures.as_completed(fs):
241243
_, infr_time, _ = f.result()
242244
tot_infr_time += infr_time
245+
246+
# Start a timer for the last ~half of the run for more accurate benchmark
247+
if chunk_i > (args.count-1 / 3):
248+
half_infr_count += 1
249+
if half_wall_start == 0:
250+
half_wall_start = time.perf_counter()
243251

244252
# Uncomment for testing
245253
# import random
246254
# logging.info("Pause")
247-
# time.sleep(random.randint(0,INTERPRETER_LIFESPAN_SECONDS*2))
255+
# time.sleep(random.randint(0,INTERPRETER_LIFESPAN_SECONDS*3))
248256
else:
249257
start = time.perf_counter()
258+
half_wall_start = time.perf_counter()
250259

251260
# snapshot = tracemalloc.take_snapshot()
252261
# top_stats = snapshot.statistics('lineno')
@@ -256,18 +265,17 @@ def main():
256265
start_one = time.perf_counter()
257266
objs, infr_time, _ = _tpu_runner.process_image(options, copy.copy(image), args.threshold)
258267
tot_infr_time += infr_time
268+
half_infr_count += 1
259269
wall_time = time.perf_counter() - start
270+
half_wall_time = time.perf_counter() - half_wall_start
271+
260272
print('completed one run every %.2fms for %d runs; %.2fms wall time for a single run' %
261273
(wall_time * 1000 / args.count, args.count,
262274
(time.perf_counter() - start_one) * 1000))
263275

264-
# Optimizing the number of segments used for a model would result in the
265-
# lowest average time spent adjusted for number of TPUs used. At some point,
266-
# adding additional segments just removes from the pool of TPUs you can use
267-
# for parallelism.
268-
print('%.2fms avg time blocked across %d threads; %.2f avg TPU * ms / run' %
276+
print('%.2fms avg time blocked across %d threads; %.2fms ea for final %d inferences' %
269277
(tot_infr_time / args.count, thread_cnt,
270-
len(_tpu_runner.pipe.tpu_list) * wall_time * 1000 / args.count))
278+
half_wall_time * 1000 / half_infr_count, half_infr_count)
271279

272280
print('-------RESULTS--------')
273281
if not objs:

0 commit comments

Comments
 (0)