Skip to content

Commit eeb80b6

Browse files
author
tatoao pro
committed
debug for simple graph
1 parent 8c51f25 commit eeb80b6

5 files changed

Lines changed: 111 additions & 96 deletions

File tree

processor_pipeline/new_core/graph.py

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,14 @@ def __init__(self, nodes: List[Node], edges: List[Edge], **kwargs):
7070
self.node_init_variables: Dict[str, Any] = {}
7171

7272

73-
async def dynamic_fan_in_pipes_task(self, node: Node):
73+
def dynamic_fan_in_pipes_task(self, node: Node):
7474
"""
7575
Fan in pipes for a node.
7676
"""
7777
precious_nodes = get_previous_nodes(self.nx_graph, node.processor_unique_name)
7878

7979
if len(precious_nodes) == 0:
80-
return
80+
return asyncio.sleep(0)
8181

8282
previous_input_pipes = [
8383
self.processor_pipes[previous_node.processor_unique_name].output_pipe
@@ -87,12 +87,12 @@ async def dynamic_fan_in_pipes_task(self, node: Node):
8787
self._fan_in_pipes(
8888
previous_input_pipes,
8989
self.processor_pipes[node.processor_unique_name].input_pipe),
90-
name=f"fan_in_pipes_task_{node.processor_unique_name}_to_{'='.join([previous_node.processor_unique_name for previous_node in precious_nodes])}"
90+
name=f"fan_in_pipes_task__from_{'='.join([previous_node.processor_unique_name for previous_node in precious_nodes])} to {node.processor_unique_name}"
9191
)
9292

93-
await task
93+
return task
9494

95-
async def dynamic_fan_out_pipes_task(self, node: Node):
95+
def dynamic_fan_out_pipes_task(self, node: Node):
9696
"""
9797
Fan out pipes for a node.
9898
"""
@@ -112,20 +112,11 @@ async def dynamic_fan_out_pipes_task(self, node: Node):
112112
),
113113
name=f"fan_out_pipes_task_{node.processor_unique_name}_to_{'='.join([next_node.processor_unique_name for next_node in next_nodes])}"
114114
)
115-
await task
115+
return task
116116
else:
117-
await asyncio.sleep(0)
117+
return asyncio.sleep(0)
118118

119119

120-
121-
async def add_processor_into_graph(self, node: Node):
122-
self.initialize_node(node)
123-
# merge input pipes
124-
self.background_tasks.append(self.dynamic_fan_in_pipes_task(node))
125-
# merge output pipes
126-
self.background_tasks.append(self.dynamic_fan_out_pipes_task(node))
127-
128-
129120
def initialize(self):
130121

131122
for node in self.nodes:
@@ -149,24 +140,33 @@ def initialize(self):
149140

150141

151142

152-
for node in self.nodes:
153-
# merge input pipes
154-
self.background_tasks.append(self.dynamic_fan_in_pipes_task(node))
155-
156-
# merge output pipes
157-
self.background_tasks.append(self.dynamic_fan_out_pipes_task(node))
158143

159144

160145
root_node = get_root_nodes(self.nx_graph)[0]
161146

162147
# Create graph's own input pipe (separate from root node's input pipe)
148+
149+
async def root_task():
150+
await self._fan_in_pipes(
151+
[self.input_pipe],
152+
self.processor_pipes[root_node.processor_unique_name].input_pipe
153+
)
154+
self.logger.warning("Graph pipe into root input pipe task completed")
155+
163156
graph_pipe_into_root_input_pipe_task = asyncio.create_task(
164-
self._fan_out_pipes(
165-
self.input_pipe,
166-
[self.processor_pipes[root_node.processor_unique_name].input_pipe]
167-
),
157+
root_task(),
168158
name="graph_pipe_into_root_input_pipe_task"
169159
)
160+
self.background_tasks.append(graph_pipe_into_root_input_pipe_task)
161+
162+
163+
for node in self.nodes:
164+
# merge input pipes
165+
self.background_tasks.append(self.dynamic_fan_in_pipes_task(node))
166+
167+
# # merge output pipes
168+
# self.background_tasks.append(self.dynamic_fan_out_pipes_task(node))
169+
170170

171171

172172
# create the task for all leaf nodes
@@ -180,8 +180,7 @@ def initialize(self):
180180
name="leaf_nodes_output_pipe_task"
181181
)
182182

183-
self.background_tasks.insert(0, graph_pipe_into_root_input_pipe_task)
184-
self.background_tasks.insert(0, leaf_nodes_output_pipe_task)
183+
self.background_tasks.append(leaf_nodes_output_pipe_task)
185184
self.logger.info(f"Graph initialized: {self.processor_id}, {self.background_tasks}")
186185

187186

@@ -217,14 +216,13 @@ async def _fan_in_pipes(self, input_pipes: List[PipeInterface], output_pipe: Pip
217216
"""Merge multiple input pipes into a single output pipe using asyncio.as_completed"""
218217
async def read_pipe_task(pipe):
219218
"""Read all data from a single pipe"""
220-
# import ipdb; ipdb.set_trace()
221219
async for message_id, data in pipe:
222220
self.logger.info(f"Fan in pipe: {data}: from {pipe._pipe_id} to {output_pipe._pipe_id}")
223221
if data is None:
224-
await output_pipe.put(data)
225222
break
226223
await output_pipe.put(data)
227224

225+
228226
try:
229227
# Create tasks for reading from each input pipe
230228
tasks = [asyncio.create_task(read_pipe_task(pipe), name=f"read_pipe_task_{pipe._pipe_id}") for pipe in input_pipes]
@@ -258,7 +256,9 @@ async def _fan_out_pipes(self, source_pipe: PipeInterface, output_pipes: List[Pi
258256
# import ipdb; ipdb.set_trace()
259257
for pipe in output_pipes:
260258
try:
259+
self.logger.info(f"Fan out pipe: None: from {source_pipe._pipe_id} to {pipe._pipe_id}")
261260
await pipe.put(None)
261+
262262
except Exception as e:
263263
self.logger.error(f"Error signaling end-of-stream to pipe {getattr(pipe, '_pipe_id', 'unknown')}: {e}")
264264

@@ -318,16 +318,21 @@ async def execute(self, data: Any, session_id: Optional[str] = None, *args, **kw
318318
tasks.append(task)
319319

320320
# Wait for all processors to complete
321-
await asyncio.gather(*self.background_tasks)
322321
await asyncio.gather(*tasks)
322+
323+
# Close input pipe to signal end-of-stream to background tasks
324+
await self.input_pipe.close()
325+
326+
self.logger.warning(f"Graph background tasks: {self.background_tasks}")
327+
await asyncio.gather(*self.background_tasks)
323328

324329

325330
except Exception as e:
326331
self.logger.error(f"Error executing graph: {e}")
327332
self.logger.error(traceback.format_exc())
328333
caught_exception = e
329334
finally:
330-
await self.input_pipe.close()
335+
# Input pipe is already closed above, only close output pipe here
331336
await self.output_pipe.close()
332337
if caught_exception:
333338
raise caught_exception
@@ -337,4 +342,4 @@ async def process(self, data: Any, *args, **kwargs) -> Any:
337342
"""
338343
Process data.
339344
"""
340-
pass
345+
pass
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from loguru import logger
2+
3+
# Configure loguru for better formatting and control
4+
def _ensure_extra_fields(record):
5+
# Always provide a default session_id for formatting
6+
record["extra"].setdefault("session_id", "-")
7+
# Optional suffix to distinguish different inputs within the same session
8+
record["extra"].setdefault("sid_suffix", "")
9+
# Default object_name for formatting
10+
record["extra"].setdefault("object_name", "UNKNOWN")
11+
return True
12+
13+
14+
logger.remove() # Remove default handler
15+
logger.add(
16+
"logs/processor_pipeline.log",
17+
rotation="10 MB",
18+
retention="7 days",
19+
level="INFO",
20+
# Include session_id in all log lines
21+
format="<green>{time:YYYY-MM-DD HH:mm:ss:SSS}</green> | sid={extra[session_id]}{extra[sid_suffix]} | <level>{level: <8}</level> | <cyan>{extra[object_name]}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
22+
backtrace=True,
23+
diagnose=True,
24+
filter=_ensure_extra_fields,
25+
)
26+
logger.add(
27+
lambda msg: print(msg, end=""),
28+
level="DEBUG",
29+
format="<green>{time:HH:mm:ss:SSS}</green> | sid={extra[session_id]}{extra[sid_suffix]} | <level>{level: <8}</level> | <cyan>{extra[object_name]}</cyan> - <level>{message}</level>",
30+
colorize=True,
31+
filter=_ensure_extra_fields,
32+
)

processor_pipeline/new_core/pipe.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import asyncio
22
import uuid
3-
from loguru import logger
3+
from .logger import logger
44
from .core_interfaces import PipeInterface, PipeMeta
55
from typing import Any, Optional, Dict, AsyncGenerator, List, Tuple, Set
66
from pydantic import BaseModel
@@ -37,10 +37,10 @@ def __init__(self, maxsize: int = -1, pipe_id: Optional[str] = None, logger=logg
3737
self._pipe_id = pipe_id
3838

3939
self.queue = asyncio.Queue(maxsize)
40-
self.logger = logger or logger.bind(name=self._pipe_id)
40+
self.logger = logger or logger.bind(object_name=self._pipe_id)
4141
# ensure logger has default session_id to avoid formatting issues when not bound
4242
try:
43-
self.logger = self.logger.bind(session_id="-")
43+
self.logger = self.logger.bind(session_id="-", object_name=self._pipe_id)
4444
except Exception:
4545
pass
4646

@@ -73,7 +73,7 @@ async def put(self, data: Any) -> None:
7373
return
7474

7575
message_id = self.generate_random_message_id(data)
76-
self.logger.debug(f"[{self._pipe_id}] [PUT] data: {data} with message_id: {message_id}")
76+
self.logger.debug(f"[PUT] data: {data} with message_id: {message_id}")
7777

7878
# Put data in main queue
7979
await self.queue.put((message_id, data))
@@ -88,9 +88,10 @@ async def get(self, timeout: Optional[float] = None) -> Any:
8888
self.statistics.historic_get_count += 1
8989
data = await self.queue.get()
9090
if data is None:
91+
self.logger.debug("[GET] data is None, Closing pipe")
9192
return None
9293
message_id, data = data
93-
self.logger.debug(f"[{self._pipe_id}] [GET] data: {data} with message_id: {message_id}")
94+
self.logger.debug(f"[GET] data: {data} with message_id: {message_id}")
9495
return (message_id, data)
9596

9697
async def __aiter__(self) -> AsyncGenerator[Any, None]:
@@ -123,7 +124,7 @@ async def register_observer(self, observer_id: Optional[str] = None) -> str:
123124
raise ValueError(f"Observer {observer_id} already exists")
124125
self._observers[observer_id] = asyncio.Queue()
125126

126-
self.logger.debug(f"[{self._pipe_id}] Registered observer: {observer_id}")
127+
self.logger.debug(f"[REGISTERED OBSERVER] {observer_id}")
127128
return observer_id
128129

129130
async def unregister_observer(self, observer_id: str) -> None:
@@ -133,7 +134,7 @@ async def unregister_observer(self, observer_id: str) -> None:
133134
# Close the observer queue
134135
self._observers[observer_id].put_nowait(None)
135136
del self._observers[observer_id]
136-
self.logger.debug(f"[{self._pipe_id}] Unregistered observer: {observer_id}")
137+
self.logger.debug(f"[UNREGISTERED OBSERVER] {observer_id}")
137138

138139
async def peek_aiter(self, observer_id: Optional[str] = None) -> AsyncGenerator[Any, None]:
139140
"""Create an async iterator for peeking without consuming from main queue"""
@@ -266,7 +267,7 @@ async def get(self, timeout: Optional[float] = None) -> Any:
266267
if data is None:
267268
return None
268269
message_id, data = data
269-
self.logger.debug(f"[{self._pipe_id}] [GET] data: {data} with message_id: {message_id}")
270+
self.logger.debug(f"[GET] data: {data} with message_id: {message_id}")
270271
return (message_id, data)
271272

272273

processor_pipeline/new_core/processor.py

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,35 +9,7 @@
99
from .pipe import BufferPipe
1010
from pydantic import BaseModel, computed_field
1111
from threading import Thread
12-
from loguru import logger
13-
14-
# Configure loguru for better formatting and control
15-
def _ensure_session_id(record):
16-
# Always provide a default session_id for formatting
17-
record["extra"].setdefault("session_id", "-")
18-
# Optional suffix to distinguish different inputs within the same session
19-
record["extra"].setdefault("sid_suffix", "")
20-
return True
21-
22-
logger.remove() # Remove default handler
23-
logger.add(
24-
"logs/processor_pipeline.log",
25-
rotation="10 MB",
26-
retention="7 days",
27-
level="INFO",
28-
# Include session_id in all log lines
29-
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | sid={extra[session_id]}{extra[sid_suffix]} | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
30-
backtrace=True,
31-
diagnose=True,
32-
filter=_ensure_session_id,
33-
)
34-
logger.add(
35-
lambda msg: print(msg, end=""),
36-
level="DEBUG",
37-
format="<green>{time:HH:mm:ss}</green> | sid={extra[session_id]}{extra[sid_suffix]} | <level>{level: <8}</level> | <cyan>{name}</cyan> - <level>{message}</level>",
38-
colorize=True,
39-
filter=_ensure_session_id,
40-
)
12+
from .logger import logger
4113

4214

4315
class ProcessorStatistics(BaseModel):
@@ -112,6 +84,7 @@ def __init__(self,
11284
# Get pipe types from meta (defaults are now set in ProcessorMeta)
11385
self.input_pipe_type = meta["input_pipe_type"]
11486
self.output_pipe_type = meta["output_pipe_type"]
87+
self.background_tasks: List[asyncio.Task] = []
11588

11689

11790
self.semaphore = asyncio.Semaphore(max_concurrent)
@@ -122,10 +95,9 @@ def __init__(self,
12295
# statistics
12396
self._statistics = ProcessorStatistics()
12497

125-
self.background_tasks: List[asyncio.Task] = []
12698

12799
# Debug logging for initialization
128-
self.logger = logger
100+
self.logger = logger.bind(object_name=self.processor_id)
129101
self.logger.debug(f"Initialized processor {self.processor_id} with output_strategy={self.output_strategy.value}, max_concurrent={max_concurrent}")
130102
self.logger.debug(f"Processor meta: {meta}")
131103

@@ -138,17 +110,17 @@ def bind_session_id(self, session_id: str) -> None:
138110
self.session = {"session_id": session_id}
139111

140112
# Bind logger for this processor
141-
self.logger = self.logger.bind(session_id=session_id)
113+
self.logger = self.logger.bind(session_id=session_id, object_name=self.processor_id)
142114

143115
# Bind logger for pipes if available
144116
if getattr(self, "input_pipe", None) is not None and getattr(self.input_pipe, "logger", None) is not None:
145117
try:
146-
self.input_pipe.logger = self.input_pipe.logger.bind(session_id=session_id)
118+
self.input_pipe.logger = self.input_pipe.logger.bind(session_id=session_id, object_name=self.input_pipe._pipe_id)
147119
except Exception:
148120
pass
149121
if getattr(self, "output_pipe", None) is not None and getattr(self.output_pipe, "logger", None) is not None:
150122
try:
151-
self.output_pipe.logger = self.output_pipe.logger.bind(session_id=session_id)
123+
self.output_pipe.logger = self.output_pipe.logger.bind(session_id=session_id, object_name=self.output_pipe._pipe_id)
152124
except Exception:
153125
pass
154126

@@ -406,10 +378,11 @@ async def execute(self, data: Any = None, session_id: Optional[str] = None, *arg
406378
self.bind_session_id(session_id)
407379

408380
self.logger.info(f"Starting execute for {self.processor_id} with output strategy: {self.output_strategy.value}")
409-
381+
410382
# Determine intake method based on data type
411383
if data is AsyncGenerator or inspect.isasyncgenfunction(data) or hasattr(data, '__aiter__'):
412384
generator = data
385+
413386
async def intake_method():
414387
item_count = 0
415388
async for item in generator:
@@ -431,9 +404,7 @@ async def intake_method():
431404
intake_task = asyncio.create_task(asyncio.sleep(0))
432405
else:
433406
# a one time intake input
434-
intake_task1 = asyncio.create_task(self.input_pipe.put(data))
435-
intake_task2 = asyncio.create_task(self.input_pipe.put(None))
436-
intake_task = asyncio.gather(intake_task1, intake_task2)
407+
intake_task = asyncio.create_task(self.input_pipe.put(data))
437408

438409
# Create main processing task based on output strategy
439410
if self.output_strategy == OutputStrategy.ASAP:
@@ -446,7 +417,7 @@ async def intake_method():
446417
else:
447418
raise ValueError(f"Invalid output strategy: {self.output_strategy}")
448419

449-
_, result, *_ = await asyncio.gather(intake_task, main_task, *self.background_tasks)
420+
_, result = await asyncio.gather(intake_task, main_task)
450421
self.logger.info(f"Execute completed for {self.processor_id}")
451422
return result
452423

0 commit comments

Comments
 (0)