layout	default
title	AG2 Tutorial - Chapter 7: Advanced Patterns
nav_order	7
has_children	false
parent	AG2 Tutorial

Chapter 7: Advanced Patterns & Optimization

Welcome to Chapter 7: Advanced Patterns & Optimization. In this part of AG2 Tutorial: Next-Generation Multi-Agent Framework, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.

Master advanced AG2 techniques: nested chats, caching, performance optimization, and production-ready patterns.

Overview

This chapter covers advanced patterns that enable sophisticated agent architectures, performance optimization, and production deployment strategies.

Nested Chat Patterns

Hierarchical Agent Structures

from ag2 import AssistantAgent, UserProxyAgent, GroupChat, GroupChatManager
import asyncio

class HierarchicalAgent(AssistantAgent):
    def __init__(self, sub_agents=None, **kwargs):
        super().__init__(**kwargs)
        self.sub_agents = sub_agents or []
        self.active_conversations = {}
        self.conversation_history = []

    async def initiate_nested_chat(self, task, sub_agent_name, context=None):
        """Start a nested conversation with a sub-agent"""
        if sub_agent_name not in self.sub_agents:
            return {"error": f"Sub-agent '{sub_agent_name}' not available"}

        sub_agent = self.sub_agents[sub_agent_name]

        # Create nested chat context
        conversation_id = f"nested_{sub_agent_name}_{len(self.active_conversations)}"

        nested_context = {
            "parent_task": task,
            "context": context or {},
            "conversation_id": conversation_id,
            "start_time": time.time()
        }

        # Start nested conversation
        try:
            result = await sub_agent.initiate_chat_async(
                message=self._prepare_nested_message(task, context),
                max_turns=5
            )

            nested_context["result"] = result
            nested_context["status"] = "completed"
            nested_context["end_time"] = time.time()

        except Exception as e:
            nested_context["error"] = str(e)
            nested_context["status"] = "failed"
            nested_context["end_time"] = time.time()

        self.active_conversations[conversation_id] = nested_context
        self.conversation_history.append(nested_context)

        return nested_context

    def _prepare_nested_message(self, task, context):
        """Prepare message for nested conversation"""
        message = f"Sub-task: {task}\n"

        if context:
            message += f"Context: {json.dumps(context, indent=2)}\n"

        message += "\nPlease focus on this specific aspect and provide detailed analysis."
        return message

# Create hierarchical agent system
research_director = HierarchicalAgent(
    name="research_director",
    system_message="""You are a research director. Coordinate research across multiple domains.
    Delegate specialized research tasks to appropriate sub-agents.""",
    llm_config=llm_config
)

# Create specialized sub-agents
technical_researcher = AssistantAgent(
    name="technical_researcher",
    system_message="Focus on technical implementation details and code analysis.",
    llm_config=llm_config
)

business_analyst = AssistantAgent(
    name="business_analyst",
    system_message="Focus on business impact, market analysis, and strategic implications.",
    llm_config=llm_config
)

user_experience_researcher = AssistantAgent(
    name="ux_researcher",
    system_message="Focus on user behavior, experience design, and usability studies.",
    llm_config=llm_config
)

# Register sub-agents
research_director.sub_agents = {
    "technical": technical_researcher,
    "business": business_analyst,
    "ux": user_experience_researcher
}

# Example nested research
async def conduct_nested_research(topic):
    """Conduct comprehensive research using nested conversations"""

    # Initial coordination
    coordination_result = await research_director.initiate_nested_chat(
        f"Coordinate research on: {topic}",
        "business",
        {"priority": "high", "timeline": "2 weeks"}
    )

    # Parallel nested conversations
    tasks = [
        research_director.initiate_nested_chat(
            f"Technical analysis: {topic}",
            "technical",
            {"focus": "implementation"}
        ),
        research_director.initiate_nested_chat(
            f"UX research: {topic}",
            "ux",
            {"focus": "user_needs"}
        )
    ]

    # Execute parallel research
    technical_result, ux_result = await asyncio.gather(*tasks)

    # Synthesis
    synthesis_result = await research_director.initiate_nested_chat(
        f"Synthesize findings on: {topic}",
        "business",
        {
            "technical_findings": technical_result.get("result"),
            "ux_findings": ux_result.get("result")
        }
    )

    return {
        "coordination": coordination_result,
        "technical": technical_result,
        "ux": ux_result,
        "synthesis": synthesis_result
    }

# Run nested research
result = await conduct_nested_research("AI-powered code review tools")

Recursive Agent Delegation

class RecursiveAgent(AssistantAgent):
    def __init__(self, max_depth=3, **kwargs):
        super().__init__(**kwargs)
        self.max_depth = max_depth
        self.delegation_tree = {}

    async def recursive_solve(self, problem, depth=0, context=None):
        """Recursively break down and solve problems"""
        if depth >= self.max_depth:
            # Base case: solve directly
            return await self._solve_base_case(problem, context)

        # Analyze problem complexity
        complexity = await self._assess_complexity(problem)

        if complexity == "simple":
            return await self._solve_base_case(problem, context)

        # Break down problem
        subproblems = await self._decompose_problem(problem, context)

        # Recursively solve subproblems
        solutions = []
        for i, subproblem in enumerate(subproblems):
            sub_context = {
                **(context or {}),
                "parent_problem": problem,
                "subproblem_index": i,
                "depth": depth + 1
            }

            solution = await self.recursive_solve(subproblem, depth + 1, sub_context)
            solutions.append(solution)

        # Synthesize solutions
        final_solution = await self._synthesize_solutions(problem, solutions, context)

        return final_solution

    async def _assess_complexity(self, problem):
        """Assess problem complexity"""
        prompt = f"Assess the complexity of this problem on a scale of simple/medium/complex: {problem}"

        response = await self.generate_reply_async([{"content": prompt, "role": "user"}])

        if "simple" in response.lower():
            return "simple"
        elif "complex" in response.lower():
            return "complex"
        else:
            return "medium"

    async def _decompose_problem(self, problem, context):
        """Break down problem into subproblems"""
        prompt = f"""Break down this problem into 2-4 smaller, manageable subproblems:

Problem: {problem}

Context: {json.dumps(context or {}, indent=2)}

Provide subproblems as a numbered list."""

        response = await self.generate_reply_async([{"content": prompt, "role": "user"}])

        # Parse subproblems from response
        lines = response.split('\n')
        subproblems = []

        for line in lines:
            line = line.strip()
            if line and (line[0].isdigit() or line.startswith('-')):
                # Remove numbering
                if line[0].isdigit():
                    line = line.split('.', 1)[-1].strip()
                elif line.startswith('-'):
                    line = line[1:].strip()

                if line:
                    subproblems.append(line)

        return subproblems[:4]  # Limit to 4 subproblems

    async def _solve_base_case(self, problem, context):
        """Solve a simple problem directly"""
        prompt = f"Solve this problem: {problem}"

        if context:
            prompt += f"\n\nContext: {json.dumps(context, indent=2)}"

        solution = await self.generate_reply_async([{"content": prompt, "role": "user"}])

        return {
            "problem": problem,
            "solution": solution,
            "type": "base_case",
            "context": context
        }

    async def _synthesize_solutions(self, original_problem, solutions, context):
        """Combine solutions from subproblems"""
        synthesis_prompt = f"""Synthesize solutions for the original problem:

Original Problem: {original_problem}

Subproblem Solutions:
{chr(10).join(f"{i+1}. {sol['solution']}" for i, sol in enumerate(solutions))}

Context: {json.dumps(context or {}, indent=2)}

Provide a comprehensive solution that addresses the original problem."""

        final_solution = await self.generate_reply_async([{"content": synthesis_prompt, "role": "user"}])

        return {
            "original_problem": original_problem,
            "subproblem_solutions": solutions,
            "synthesized_solution": final_solution,
            "type": "synthesized",
            "context": context
        }

# Create recursive problem solver
recursive_solver = RecursiveAgent(
    name="recursive_solver",
    max_depth=3,
    system_message="Break down complex problems recursively and solve them systematically.",
    llm_config=llm_config
)

# Solve complex problem recursively
complex_problem = "Design a complete e-commerce platform with user authentication, product catalog, shopping cart, payment processing, and order management"

solution = await recursive_solver.recursive_solve(complex_problem)

Caching and Performance Optimization

Response Caching

import hashlib
import json
from functools import lru_cache
import time

class CachingAssistantAgent(AssistantAgent):
    def __init__(self, cache_ttl=3600, **kwargs):  # 1 hour default TTL
        super().__init__(**kwargs)
        self.cache = {}
        self.cache_ttl = cache_ttl
        self.cache_stats = {
            "hits": 0,
            "misses": 0,
            "expired": 0
        }

    def _get_cache_key(self, messages):
        """Generate cache key from messages"""
        # Create deterministic string from messages
        message_str = json.dumps(messages, sort_keys=True)
        return hashlib.md5(message_str.encode()).hexdigest()

    def _is_cache_valid(self, cache_entry):
        """Check if cache entry is still valid"""
        return time.time() - cache_entry["timestamp"] < self.cache_ttl

    async def generate_reply_async(self, messages, **kwargs):
        """Override to add caching"""
        cache_key = self._get_cache_key(messages)

        # Check cache
        if cache_key in self.cache:
            cache_entry = self.cache[cache_key]

            if self._is_cache_valid(cache_entry):
                self.cache_stats["hits"] += 1
                return cache_entry["response"]
            else:
                self.cache_stats["expired"] += 1
                del self.cache[cache_key]

        self.cache_stats["misses"] += 1

        # Generate new response
        response = await super().generate_reply_async(messages, **kwargs)

        # Cache the response
        self.cache[cache_key] = {
            "response": response,
            "timestamp": time.time(),
            "messages_hash": cache_key
        }

        return response

    def get_cache_stats(self):
        """Get cache performance statistics"""
        total_requests = sum(self.cache_stats.values())
        hit_rate = self.cache_stats["hits"] / total_requests if total_requests > 0 else 0

        return {
            **self.cache_stats,
            "total_requests": total_requests,
            "hit_rate": hit_rate,
            "cache_size": len(self.cache)
        }

    def clear_expired_cache(self):
        """Remove expired cache entries"""
        current_time = time.time()
        expired_keys = [
            key for key, entry in self.cache.items()
            if current_time - entry["timestamp"] >= self.cache_ttl
        ]

        for key in expired_keys:
            del self.cache[key]

        return len(expired_keys)

# Create caching agent
caching_agent = CachingAssistantAgent(
    name="caching_agent",
    cache_ttl=1800,  # 30 minutes
    llm_config=llm_config
)

# Use caching agent
response1 = await caching_agent.generate_reply_async([
    {"content": "What is the capital of France?", "role": "user"}
])

response2 = await caching_agent.generate_reply_async([
    {"content": "What is the capital of France?", "role": "user"}
])  # This should be cached

print("Cache stats:", caching_agent.get_cache_stats())

Semantic Caching

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

class SemanticCachingAgent(AssistantAgent):
    def __init__(self, similarity_threshold=0.85, **kwargs):
        super().__init__(**kwargs)
        self.cache = {}
        self.embeddings = {}
        self.similarity_threshold = similarity_threshold

    async def generate_embedding(self, text):
        """Generate embedding for text (simplified)"""
        # In practice, use a proper embedding model
        # This is a placeholder implementation
        import hashlib
        hash_obj = hashlib.md5(text.encode())
        # Convert hash to simple vector representation
        vector = np.array([int(hash_obj.hexdigest()[i:i+2], 16) for i in range(0, 32, 2)])
        return vector / np.linalg.norm(vector)  # Normalize

    def find_similar_query(self, query_embedding):
        """Find similar cached queries"""
        if not self.embeddings:
            return None

        similarities = {}
        for cache_key, embedding in self.embeddings.items():
            similarity = cosine_similarity(
                query_embedding.reshape(1, -1),
                embedding.reshape(1, -1)
            )[0][0]
            similarities[cache_key] = similarity

        # Find most similar
        best_match = max(similarities.items(), key=lambda x: x[1])

        if best_match[1] >= self.similarity_threshold:
            return best_match[0]

        return None

    async def generate_reply_async(self, messages, **kwargs):
        """Override with semantic caching"""
        query = messages[-1]["content"] if messages else ""

        # Generate embedding for current query
        query_embedding = await self.generate_embedding(query)

        # Find similar cached query
        similar_key = self.find_similar_query(query_embedding)

        if similar_key and similar_key in self.cache:
            cache_entry = self.cache[similar_key]
            if time.time() - cache_entry["timestamp"] < 3600:  # 1 hour TTL
                return f"[SEMANTIC CACHE HIT] {cache_entry['response']}"

        # Generate new response
        response = await super().generate_reply_async(messages, **kwargs)

        # Cache with embedding
        cache_key = hashlib.md5(query.encode()).hexdigest()
        self.cache[cache_key] = {
            "response": response,
            "timestamp": time.time(),
            "original_query": query
        }
        self.embeddings[cache_key] = query_embedding

        return response

# Create semantic caching agent
semantic_agent = SemanticCachingAgent(
    name="semantic_agent",
    similarity_threshold=0.9,
    llm_config=llm_config
)

Advanced Conversation Patterns

Conversational State Management

class StatefulAgent(AssistantAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.conversation_state = {
            "current_topic": None,
            "user_preferences": {},
            "conversation_history": [],
            "context_variables": {},
            "emotional_state": "neutral"
        }
        self.state_transitions = {
            "greeting": ["question_asked", "topic_introduced"],
            "question_asked": ["answering", "clarifying"],
            "topic_introduced": ["deepening", "broadening"],
            "deepening": ["expert_analysis", "solution_provided"],
            "broadening": ["topic_introduced", "comparison_made"]
        }

    def update_state(self, message_type, metadata=None):
        """Update conversation state based on message type"""
        current_state = self.conversation_state.get("current_topic", "greeting")

        if current_state in self.state_transitions:
            possible_transitions = self.state_transitions[current_state]

            # Determine most appropriate transition
            if message_type in possible_transitions:
                self.conversation_state["current_topic"] = message_type
            elif metadata and "sentiment" in metadata:
                if metadata["sentiment"] == "confused":
                    self.conversation_state["emotional_state"] = "clarifying"
                elif metadata["sentiment"] == "satisfied":
                    self.conversation_state["emotional_state"] = "positive"

        # Update conversation history
        self.conversation_state["conversation_history"].append({
            "timestamp": time.time(),
            "state": self.conversation_state["current_topic"],
            "emotional_state": self.conversation_state["emotional_state"],
            "message_type": message_type
        })

    def get_state_context(self):
        """Get current state for context"""
        return {
            "current_topic": self.conversation_state["current_topic"],
            "emotional_state": self.conversation_state["emotional_state"],
            "recent_history": self.conversation_state["conversation_history"][-3:],
            "preferences": self.conversation_state["user_preferences"]
        }

    async def generate_reply_async(self, messages, **kwargs):
        """Generate reply with state awareness"""
        # Analyze incoming message
        last_message = messages[-1]["content"] if messages else ""
        message_type = self._classify_message(last_message)

        # Update state
        self.update_state(message_type)

        # Get state context
        state_context = self.get_state_context()

        # Modify system message based on state
        enhanced_system_message = self._enhance_system_message(state_context)

        # Temporarily update system message
        original_system_message = self.system_message
        self.system_message = enhanced_system_message

        try:
            response = await super().generate_reply_async(messages, **kwargs)
        finally:
            # Restore original system message
            self.system_message = original_system_message

        return response

    def _classify_message(self, message):
        """Classify message type"""
        message = message.lower()

        if any(word in message for word in ["hello", "hi", "greetings"]):
            return "greeting"
        elif any(word in message for word in ["what", "how", "why", "when", "where"]):
            return "question_asked"
        elif any(word in message for word in ["explain", "details", "more"]):
            return "deepening"
        elif any(word in message for word in ["compare", "versus", "vs", "different"]):
            return "comparison_made"
        else:
            return "topic_introduced"

    def _enhance_system_message(self, state_context):
        """Enhance system message based on conversation state"""
        base_message = self.system_message

        enhancements = []

        if state_context["current_topic"] == "question_asked":
            enhancements.append("The user has asked a question. Provide clear, direct answers.")

        if state_context["emotional_state"] == "confused":
            enhancements.append("The user seems confused. Explain concepts clearly and offer to clarify.")

        if state_context["current_topic"] == "deepening":
            enhancements.append("The user wants more details. Provide comprehensive information.")

        if enhancements:
            base_message += "\n\nAdditional context:\n" + "\n".join(f"- {enh}" for enh in enhancements)

        return base_message

# Create stateful agent
stateful_agent = StatefulAgent(
    name="stateful_agent",
    system_message="You are a helpful AI assistant with conversation awareness.",
    llm_config=llm_config
)

Multi-Turn Conversation Planning

class ConversationPlanner(AssistantAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.conversation_plan = []
        self.current_step = 0

    async def plan_conversation(self, goal, max_steps=5):
        """Create a conversation plan to achieve a goal"""
        planning_prompt = f"""Create a step-by-step conversation plan to achieve this goal: {goal}

Provide a numbered list of conversation steps, each with:
- Step description
- Expected user response type
- Success criteria
- Fallback actions

Limit to {max_steps} steps maximum."""

        plan_response = await self.generate_reply_async([{"content": planning_prompt, "role": "user"}])

        # Parse plan from response
        self.conversation_plan = self._parse_conversation_plan(plan_response)
        self.current_step = 0

        return self.conversation_plan

    def _parse_conversation_plan(self, plan_text):
        """Parse conversation plan from text"""
        lines = plan_text.split('\n')
        plan = []

        current_step = None
        for line in lines:
            line = line.strip()
            if line and line[0].isdigit():
                if current_step:
                    plan.append(current_step)

                step_num = line.split('.')[0]
                description = line.split('.', 1)[1].strip() if '.' in line else line

                current_step = {
                    "step": int(step_num),
                    "description": description,
                    "expected_response": "",
                    "success_criteria": "",
                    "fallback": ""
                }
            elif current_step and line.startswith('-'):
                # Parse sub-details
                if "expected" in line.lower():
                    current_step["expected_response"] = line[1:].strip()
                elif "success" in line.lower():
                    current_step["success_criteria"] = line[1:].strip()
                elif "fallback" in line.lower():
                    current_step["fallback"] = line[1:].strip()

        if current_step:
            plan.append(current_step)

        return plan

    async def execute_conversation_step(self, user_message):
        """Execute current conversation step"""
        if not self.conversation_plan or self.current_step >= len(self.conversation_plan):
            return {"status": "completed", "message": "Conversation plan completed"}

        current_plan_step = self.conversation_plan[self.current_step]

        # Evaluate user response against success criteria
        success = await self._evaluate_success_criteria(user_message, current_plan_step)

        if success:
            self.current_step += 1

            if self.current_step < len(self.conversation_plan):
                next_step = self.conversation_plan[self.current_step]
                return {
                    "status": "progress",
                    "current_step": self.current_step,
                    "next_action": next_step["description"]
                }
            else:
                return {"status": "completed", "message": "All conversation steps completed"}
        else:
            # Try fallback action
            fallback_result = await self._execute_fallback(current_plan_step)
            return {
                "status": "fallback",
                "fallback_action": fallback_result,
                "current_step": self.current_step
            }

    async def _evaluate_success_criteria(self, user_message, plan_step):
        """Evaluate if user response meets success criteria"""
        criteria = plan_step.get("success_criteria", "")

        if not criteria:
            return True  # No criteria means any response is success

        evaluation_prompt = f"""Evaluate if this user response meets the success criteria:

User Response: {user_message}
Success Criteria: {criteria}

Respond with only 'SUCCESS' or 'FAILURE'."""

        response = await self.generate_reply_async([{"content": evaluation_prompt, "role": "user"}])

        return "SUCCESS" in response.upper()

    async def _execute_fallback(self, plan_step):
        """Execute fallback action"""
        fallback = plan_step.get("fallback", "Ask for clarification")

        if "ask for clarification" in fallback.lower():
            return "Could you please provide more details or clarify your response?"
        elif "rephrase" in fallback.lower():
            return f"Let me rephrase the question: {plan_step['description']}"
        else:
            return fallback

# Create conversation planner
planner_agent = ConversationPlanner(
    name="conversation_planner",
    system_message="You plan and guide structured conversations to achieve specific goals.",
    llm_config=llm_config
)

# Plan and execute a structured conversation
goal = "Help user choose a programming language for web development"
plan = await planner_agent.plan_conversation(goal, max_steps=4)

# Execute conversation steps based on user responses
# (This would be integrated into the main conversation loop)

Production Deployment Patterns

Agent Orchestration Framework

class AgentOrchestrator:
    def __init__(self):
        self.agents = {}
        self.workflows = {}
        self.monitoring = {
            "active_conversations": 0,
            "completed_tasks": 0,
            "error_rate": 0.0
        }

    def register_agent(self, name, agent, capabilities=None):
        """Register an agent with the orchestrator"""
        self.agents[name] = {
            "agent": agent,
            "capabilities": capabilities or [],
            "status": "available",
            "performance_metrics": {
                "total_calls": 0,
                "success_rate": 1.0,
                "average_response_time": 0.0
            }
        }

    def create_workflow(self, name, steps):
        """Create a workflow definition"""
        self.workflows[name] = {
            "steps": steps,
            "created_at": time.time(),
            "execution_count": 0
        }

    async def execute_workflow(self, workflow_name, input_data):
        """Execute a predefined workflow"""
        if workflow_name not in self.workflows:
            raise ValueError(f"Workflow '{workflow_name}' not found")

        workflow = self.workflows[workflow_name]
        workflow["execution_count"] += 1

        results = {}
        context = {"input": input_data}

        for step in workflow["steps"]:
            step_name = step["name"]
            agent_name = step["agent"]
            task = step["task"]

            if agent_name not in self.agents:
                raise ValueError(f"Agent '{agent_name}' not found")

            agent_info = self.agents[agent_name]

            # Mark agent as busy
            agent_info["status"] = "busy"

            try:
                start_time = time.time()

                # Execute step
                result = await agent_info["agent"].generate_reply_async([
                    {"content": self._prepare_step_message(task, context), "role": "user"}
                ])

                end_time = time.time()

                # Update metrics
                agent_info["performance_metrics"]["total_calls"] += 1
                response_time = end_time - start_time
                agent_info["performance_metrics"]["average_response_time"] = (
                    (agent_info["performance_metrics"]["average_response_time"] *
                     (agent_info["performance_metrics"]["total_calls"] - 1)) +
                    response_time
                ) / agent_info["performance_metrics"]["total_calls"]

                results[step_name] = result
                context[step_name] = result

            finally:
                # Mark agent as available
                agent_info["status"] = "available"

        return results

    def _prepare_step_message(self, task, context):
        """Prepare message for workflow step"""
        message = f"Task: {task}\n"

        if context:
            message += f"Context:\n{json.dumps(context, indent=2)}\n"

        return message

    def get_system_status(self):
        """Get overall system status"""
        agent_status = {}
        for name, info in self.agents.items():
            agent_status[name] = {
                "status": info["status"],
                "capabilities": info["capabilities"],
                "performance": info["performance_metrics"]
            }

        return {
            "agents": agent_status,
            "workflows": list(self.workflows.keys()),
            "monitoring": self.monitoring
        }

# Create production orchestrator
orchestrator = AgentOrchestrator()

# Register agents
orchestrator.register_agent(
    "researcher",
    research_agent,
    ["research", "analysis", "information_gathering"]
)

orchestrator.register_agent(
    "developer",
    coding_agent,
    ["coding", "implementation", "debugging"]
)

orchestrator.register_agent(
    "reviewer",
    review_agent,
    ["review", "validation", "quality_assurance"]
)

# Create workflow
orchestrator.create_workflow("feature_development", [
    {
        "name": "requirements_analysis",
        "agent": "researcher",
        "task": "Analyze requirements and create detailed specifications"
    },
    {
        "name": "implementation",
        "agent": "developer",
        "task": "Implement the feature based on specifications"
    },
    {
        "name": "code_review",
        "agent": "reviewer",
        "task": "Review code quality and provide feedback"
    }
])

# Execute workflow
results = await orchestrator.execute_workflow("feature_development", {
    "feature": "User authentication system",
    "requirements": ["Secure login", "Password reset", "Session management"]
})

Best Practices

Performance Optimization

Caching Strategy: Implement multi-level caching (response, semantic, contextual)
Resource Management: Monitor and limit resource usage per agent
Async Processing: Use async patterns for concurrent operations
Load Balancing: Distribute work across multiple agent instances

Reliability Patterns

Circuit Breakers: Implement failure detection and recovery
Retry Logic: Exponential backoff for transient failures
Fallback Strategies: Graceful degradation when services fail
Health Monitoring: Continuous monitoring of agent health and performance

Scalability Considerations

Horizontal Scaling: Deploy multiple instances of similar agents
State Management: Proper state synchronization across instances
Queue Management: Handle request queuing and prioritization
Resource Pooling: Share expensive resources (GPU, API connections)

Observability

Metrics Collection: Track performance, error rates, and usage patterns
Distributed Tracing: Follow requests across agent interactions
Logging Strategy: Structured logging with correlation IDs
Alerting: Set up alerts for critical issues and performance degradation

Summary

In this chapter, we've covered:

Nested Chat Patterns: Hierarchical agents and recursive delegation
Caching Strategies: Response caching and semantic similarity
Advanced Conversation Patterns: Stateful conversations and multi-turn planning
Production Deployment: Agent orchestration and workflow management
Performance Optimization: Monitoring, scaling, and reliability patterns
Best Practices: Production-ready agent system design and maintenance

Next, we'll explore production deployment - scaling, monitoring, and enterprise integration.

Ready for the next chapter? Chapter 8: Production Deployment

Generated for Awesome Code Docs

What Problem Does This Solve?

Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for self, context, response so behavior stays predictable as complexity grows.

In practical terms, this chapter helps you avoid three common failures:

coupling core logic too tightly to one implementation path
missing the handoff boundaries between setup, execution, and validation
shipping changes without clear rollback or observability strategy

After working through this chapter, you should be able to reason about Chapter 7: Advanced Patterns & Optimization as an operating subsystem inside AG2 Tutorial: Next-Generation Multi-Agent Framework, with explicit contracts for inputs, state transitions, and outputs.

Use the implementation notes around line, problem, message as your checklist when adapting these patterns to your own repository.

How it Works Under the Hood

Under the hood, Chapter 7: Advanced Patterns & Optimization usually follows a repeatable control path:

Context bootstrap: initialize runtime config and prerequisites for self.
Input normalization: shape incoming data so context receives stable contracts.
Core execution: run the main logic branch and propagate intermediate state through response.
Policy and safety checks: enforce limits, auth scopes, and failure boundaries.
Output composition: return canonical result payloads for downstream consumers.
Operational telemetry: emit logs/metrics needed for debugging and performance tuning.

When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.

Source Walkthrough

Use the following upstream sources to verify implementation details while reading this chapter:

View Repo Why it matters: authoritative reference on View Repo (github.com).
github.com/microsoft/autogen Why it matters: authoritative reference on github.com/microsoft/autogen (github.com).
Awesome Code Docs Why it matters: authoritative reference on Awesome Code Docs (github.com).

Suggested trace strategy:

search upstream code for self and context to map concrete implementation paths
compare docs claims against actual runtime/config code before reusing patterns in production

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Chapter 7: Advanced Patterns & Optimization

Overview

Nested Chat Patterns

Hierarchical Agent Structures

Recursive Agent Delegation

Caching and Performance Optimization

Response Caching

Semantic Caching

Advanced Conversation Patterns

Conversational State Management

Multi-Turn Conversation Planning

Production Deployment Patterns

Agent Orchestration Framework

Best Practices

Performance Optimization

Reliability Patterns

Scalability Considerations

Observability

Summary

What Problem Does This Solve?

How it Works Under the Hood

Source Walkthrough

Chapter Connections

FilesExpand file tree

07-advanced-patterns.md

Latest commit

History

07-advanced-patterns.md

File metadata and controls

Chapter 7: Advanced Patterns & Optimization

Overview

Nested Chat Patterns

Hierarchical Agent Structures

Recursive Agent Delegation

Caching and Performance Optimization

Response Caching

Semantic Caching

Advanced Conversation Patterns

Conversational State Management

Multi-Turn Conversation Planning

Production Deployment Patterns

Agent Orchestration Framework

Best Practices

Performance Optimization

Reliability Patterns

Scalability Considerations

Observability

Summary

What Problem Does This Solve?

How it Works Under the Hood

Source Walkthrough

Chapter Connections