@@ -231,6 +231,8 @@ def main():
231231
232232 thread_cnt = 16
233233 tot_infr_time = 0
234+ half_wall_start = 0
235+ half_infr_count = 0
234236 if args .count > 1 :
235237 with concurrent .futures .ThreadPoolExecutor (max_workers = thread_cnt ) as executor :
236238 start = time .perf_counter ()
@@ -240,13 +242,20 @@ def main():
240242 for f in concurrent .futures .as_completed (fs ):
241243 _ , infr_time , _ = f .result ()
242244 tot_infr_time += infr_time
245+
246+ # Start a timer for the last ~half of the run for more accurate benchmark
247+ if chunk_i > (args .count - 1 / 3 ):
248+ half_infr_count += 1
249+ if half_wall_start == 0 :
250+ half_wall_start = time .perf_counter ()
243251
244252 # Uncomment for testing
245253 # import random
246254 # logging.info("Pause")
247- # time.sleep(random.randint(0,INTERPRETER_LIFESPAN_SECONDS*2 ))
255+ # time.sleep(random.randint(0,INTERPRETER_LIFESPAN_SECONDS*3 ))
248256 else :
249257 start = time .perf_counter ()
258+ half_wall_start = time .perf_counter ()
250259
251260 # snapshot = tracemalloc.take_snapshot()
252261 # top_stats = snapshot.statistics('lineno')
@@ -256,18 +265,17 @@ def main():
256265 start_one = time .perf_counter ()
257266 objs , infr_time , _ = _tpu_runner .process_image (options , copy .copy (image ), args .threshold )
258267 tot_infr_time += infr_time
268+ half_infr_count += 1
259269 wall_time = time .perf_counter () - start
270+ half_wall_time = time .perf_counter () - half_wall_start
271+
260272 print ('completed one run every %.2fms for %d runs; %.2fms wall time for a single run' %
261273 (wall_time * 1000 / args .count , args .count ,
262274 (time .perf_counter () - start_one ) * 1000 ))
263275
264- # Optimizing the number of segments used for a model would result in the
265- # lowest average time spent adjusted for number of TPUs used. At some point,
266- # adding additional segments just removes from the pool of TPUs you can use
267- # for parallelism.
268- print ('%.2fms avg time blocked across %d threads; %.2f avg TPU * ms / run' %
276+ print ('%.2fms avg time blocked across %d threads; %.2fms ea for final %d inferences' %
269277 (tot_infr_time / args .count , thread_cnt ,
270- len ( _tpu_runner . pipe . tpu_list ) * wall_time * 1000 / args . count ) )
278+ half_wall_time * 1000 / half_infr_count , half_infr_count )
271279
272280 print ('-------RESULTS--------' )
273281 if not objs :
0 commit comments