From 6621b68e884a5861e1bf80ec331837700b6e8aba Mon Sep 17 00:00:00 2001 From: vringar Date: Thu, 19 Feb 2026 23:41:33 +0100 Subject: [PATCH 1/2] perf: reduce fixed sleeps and use blocking queue.get() for completion signaling --- openwpm/browser_manager.py | 3 ++- openwpm/commands/browser_commands.py | 3 ++- openwpm/task_manager.py | 24 ++++++++++++++++++++---- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/openwpm/browser_manager.py b/openwpm/browser_manager.py index cbad29fb6..942c38ae1 100644 --- a/openwpm/browser_manager.py +++ b/openwpm/browser_manager.py @@ -504,7 +504,8 @@ def execute_command_sequence( ) # Sleep after executing CommandSequence to provide extra time for # internal buffers to drain. Stopgap in support of #135 - time.sleep(2) + drain_sleep = 0.1 if self.manager_params.testing else 2 + time.sleep(drain_sleep) if context.closing: return diff --git a/openwpm/commands/browser_commands.py b/openwpm/commands/browser_commands.py index 45aee69e0..887cb273a 100644 --- a/openwpm/commands/browser_commands.py +++ b/openwpm/commands/browser_commands.py @@ -484,7 +484,8 @@ def execute( tab_restart_browser(webdriver) # This doesn't immediately stop data saving from the current # visit so we sleep briefly before unsetting the visit_id. - time.sleep(self.sleep) + sleep = 0.1 if manager_params.testing else self.sleep + time.sleep(sleep) msg = {"action": "Finalize", "visit_id": self.visit_id} extension_socket.send(msg) diff --git a/openwpm/task_manager.py b/openwpm/task_manager.py index ccf34ef9a..9eb5fcced 100644 --- a/openwpm/task_manager.py +++ b/openwpm/task_manager.py @@ -401,18 +401,34 @@ def _start_thread( return thread def _mark_command_sequences_complete(self) -> None: - """Polls the storage controller for saved records - and calls their callbacks + """Waits for completed visits from the storage controller + and invokes their callbacks. + + Uses blocking queue.get() with a 1s timeout instead of polling + with sleep(1), so callbacks fire as soon as data is available. """ while True: if self.closing and not self.unsaved_command_sequences: # we're shutting down and have no unprocessed callbacks break + # First drain any already-available completions visit_id_list = self.storage_controller_handle.get_new_completed_visits() + if not visit_id_list: - time.sleep(1) - continue + # Block waiting for the next completion, with timeout as fallback + try: + item = self.storage_controller_handle.completion_queue.get( + block=True, timeout=1 + ) + visit_id_list = [item] + # Drain any additional completions that arrived + visit_id_list.extend( + self.storage_controller_handle.get_new_completed_visits() + ) + except Exception: + # queue.Empty or other exceptions - just loop back + continue for visit_id, successful in visit_id_list: self.logger.debug("Invoking callback of visit_id %d", visit_id) From b896b916d361c720a9b6309a07f872d17f1e8673 Mon Sep 17 00:00:00 2001 From: vringar Date: Thu, 19 Feb 2026 23:41:52 +0100 Subject: [PATCH 2/2] perf: reduce completion queue polling interval in StorageController Reduce update_completion_queue sleep from 5s to 0.5s so finalized visits reach the completion queue faster. Keep as fallback for the blocking get() on the TaskManager side. --- openwpm/storage/storage_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openwpm/storage/storage_controller.py b/openwpm/storage/storage_controller.py index aa50a2b55..947ca51b3 100644 --- a/openwpm/storage/storage_controller.py +++ b/openwpm/storage/storage_controller.py @@ -336,7 +336,7 @@ async def update_completion_queue(self) -> None: else: new_finalize_tasks.append((visit_id, token, success)) self.finalize_tasks = new_finalize_tasks - await asyncio.sleep(5) + await asyncio.sleep(0.5) async def _run(self) -> None: await self.structured_storage.init()