Skip to content

Commit 6232716

Browse files
authored
Adjust the number of workers to avoid db pool is overflow (#22)
1 parent 13920d4 commit 6232716

7 files changed

Lines changed: 834 additions & 126 deletions

File tree

Dockerfile

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,18 @@ ENV CLERK_API_KEY=${ARG_CLERK_API_KEY}
1616
ENV CLERK_API_URL=${ARG_CLERK_API_URL:-https://api.clerk.dev/v1}
1717
ENV FORGE_DEBUG_LOGGING=${ARG_DEBUG_LOGGING}
1818

19+
# Database connection optimization environment variables
20+
# These settings optimize for PostgreSQL connection limits
21+
ENV DB_POOL_SIZE=3
22+
ENV DB_MAX_OVERFLOW=2
23+
ENV DB_POOL_TIMEOUT=30
24+
ENV DB_POOL_RECYCLE=1800
25+
ENV DB_POOL_PRE_PING=true
26+
27+
# Reduced worker count to manage database connections
28+
# With 5 workers: max 60 connections (5 × 3 × 2 engines + 5 × 2 × 2 overflow = 50 connections)
29+
ENV WORKERS=5
30+
1931
# Install system dependencies including PostgreSQL client and gosu for user privilege management
2032
RUN apt-get update && apt-get install -y \
2133
postgresql-client \
@@ -37,5 +49,5 @@ USER nobody
3749
# Expose port
3850
EXPOSE 8000
3951

40-
# Run the application (this command is passed to the entrypoint)
41-
CMD ["gunicorn", "app.main:app", "-k", "uvicorn.workers.UvicornWorker", "--workers", "10", "--bind", "0.0.0.0:8000"]
52+
# Use environment variable for workers count and optimize for database connections
53+
CMD ["sh", "-c", "gunicorn app.main:app -k uvicorn.workers.UvicornWorker --workers ${WORKERS:-5} --bind 0.0.0.0:8000 --timeout 120 --max-requests 1000 --max-requests-jitter 100"]

app/api/dependencies.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,11 @@ async def get_current_user(
109109
except JWTError as err:
110110
raise credentials_exception from err
111111

112-
result = await db.execute(select(User).filter(User.username == token_data.username))
112+
result = await db.execute(
113+
select(User)
114+
.options(selectinload(User.api_keys)) # Eager load Forge API keys
115+
.filter(User.username == token_data.username)
116+
)
113117
user = result.scalar_one_or_none()
114118
if user is None:
115119
raise credentials_exception
@@ -356,7 +360,11 @@ async def get_current_user_from_clerk(
356360
)
357361

358362
# Find user by clerk_user_id
359-
result = await db.execute(select(User).filter(User.clerk_user_id == clerk_user_id))
363+
result = await db.execute(
364+
select(User)
365+
.options(selectinload(User.api_keys)) # Eager load Forge API keys
366+
.filter(User.clerk_user_id == clerk_user_id)
367+
)
360368
user = result.scalar_one_or_none()
361369

362370
# User doesn't exist yet, create one

app/api/routes/health.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
"""
2+
Health check and monitoring endpoints for production deployments.
3+
"""
4+
5+
from fastapi import APIRouter, HTTPException
6+
from sqlalchemy import text
7+
8+
from app.core.database import get_connection_info, get_db_session
9+
from app.core.logger import get_logger
10+
11+
logger = get_logger(name="health")
12+
router = APIRouter()
13+
14+
15+
@router.get("/health")
16+
async def health_check():
17+
"""
18+
Basic health check endpoint.
19+
Returns 200 if the service is running.
20+
"""
21+
return {"status": "healthy", "service": "forge"}
22+
23+
24+
@router.get("/health/database")
25+
async def database_health_check():
26+
"""
27+
Database health check endpoint.
28+
Returns detailed information about database connectivity and pool status.
29+
"""
30+
try:
31+
# Test database connection
32+
async with get_db_session() as session:
33+
result = await session.execute(text("SELECT 1"))
34+
result.scalar()
35+
36+
# Get connection pool information
37+
pool_info = get_connection_info()
38+
39+
# Calculate connection usage
40+
sync_pool = pool_info['sync_engine']
41+
async_pool = pool_info['async_engine']
42+
43+
sync_usage = sync_pool['checked_out'] / (pool_info['pool_size'] + pool_info['max_overflow']) * 100
44+
async_usage = async_pool['checked_out'] / (pool_info['pool_size'] + pool_info['max_overflow']) * 100
45+
46+
return {
47+
"status": "healthy",
48+
"database": "connected",
49+
"connection_pools": {
50+
"sync": {
51+
"checked_out": sync_pool['checked_out'],
52+
"checked_in": sync_pool['checked_in'],
53+
"size": sync_pool['size'],
54+
"usage_percent": round(sync_usage, 1)
55+
},
56+
"async": {
57+
"checked_out": async_pool['checked_out'],
58+
"checked_in": async_pool['checked_in'],
59+
"size": async_pool['size'],
60+
"usage_percent": round(async_usage, 1)
61+
}
62+
},
63+
"configuration": {
64+
"pool_size": pool_info['pool_size'],
65+
"max_overflow": pool_info['max_overflow'],
66+
"pool_timeout": pool_info['pool_timeout'],
67+
"pool_recycle": pool_info['pool_recycle']
68+
}
69+
}
70+
71+
except Exception as e:
72+
logger.error(f"Database health check failed: {e}")
73+
raise HTTPException(
74+
status_code=503,
75+
detail={
76+
"status": "unhealthy",
77+
"database": "disconnected",
78+
"error": str(e)
79+
}
80+
)
81+
82+
83+
@router.get("/health/detailed")
84+
async def detailed_health_check():
85+
"""
86+
Detailed health check including all service components.
87+
"""
88+
try:
89+
# Test database
90+
async with get_db_session() as session:
91+
db_result = await session.execute(text("SELECT version()"))
92+
db_version = db_result.scalar()
93+
94+
pool_info = get_connection_info()
95+
96+
return {
97+
"status": "healthy",
98+
"timestamp": "2025-01-21T19:15:00Z", # This would be dynamic in real implementation
99+
"service": "forge",
100+
"version": "0.1.0",
101+
"database": {
102+
"status": "connected",
103+
"version": db_version,
104+
"pool_status": pool_info
105+
},
106+
"environment": {
107+
"workers": pool_info.get('workers', 'unknown'),
108+
"pool_size": pool_info['pool_size'],
109+
"max_overflow": pool_info['max_overflow']
110+
}
111+
}
112+
113+
except Exception as e:
114+
logger.error(f"Detailed health check failed: {e}")
115+
raise HTTPException(
116+
status_code=503,
117+
detail={
118+
"status": "unhealthy",
119+
"error": str(e),
120+
"timestamp": "2025-01-21T19:15:00Z"
121+
}
122+
)

app/core/database.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,14 @@
88

99
load_dotenv()
1010

11-
POOL_SIZE = 5
12-
MAX_OVERFLOW = 10
13-
MAX_TIMEOUT = 30
14-
POOL_RECYCLE = 1800
11+
# Production-optimized connection pool settings
12+
# With 10 Gunicorn workers, this allows max 60 connections total (10 workers × 3 pool_size × 2 engines)
13+
# Plus 40 overflow connections (10 workers × 2 max_overflow × 2 engines) = 100 max connections
14+
POOL_SIZE = int(os.getenv("DB_POOL_SIZE", "3")) # Reduced from 5 to 3
15+
MAX_OVERFLOW = int(os.getenv("DB_MAX_OVERFLOW", "2")) # Reduced from 10 to 2
16+
MAX_TIMEOUT = int(os.getenv("DB_POOL_TIMEOUT", "30"))
17+
POOL_RECYCLE = int(os.getenv("DB_POOL_RECYCLE", "1800")) # 30 minutes
18+
POOL_PRE_PING = os.getenv("DB_POOL_PRE_PING", "true").lower() == "true"
1519

1620
SQLALCHEMY_DATABASE_URL = os.getenv("DATABASE_URL")
1721
if not SQLALCHEMY_DATABASE_URL:
@@ -24,6 +28,7 @@
2428
max_overflow=MAX_OVERFLOW,
2529
pool_timeout=MAX_TIMEOUT,
2630
pool_recycle=POOL_RECYCLE,
31+
pool_pre_ping=POOL_PRE_PING, # Enables connection health checks
2732
echo=False,
2833
)
2934
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
@@ -52,6 +57,7 @@ def get_db():
5257
max_overflow=MAX_OVERFLOW,
5358
pool_timeout=MAX_TIMEOUT,
5459
pool_recycle=POOL_RECYCLE,
60+
pool_pre_ping=POOL_PRE_PING, # Enables connection health checks
5561
echo=False,
5662
)
5763

@@ -84,4 +90,26 @@ async def get_db_session():
8490
await session.rollback()
8591
raise
8692
finally:
87-
await session.close()
93+
await session.close()
94+
95+
96+
def get_connection_info():
97+
"""Get current connection pool information for monitoring"""
98+
return {
99+
"pool_size": POOL_SIZE,
100+
"max_overflow": MAX_OVERFLOW,
101+
"pool_timeout": MAX_TIMEOUT,
102+
"pool_recycle": POOL_RECYCLE,
103+
"sync_engine": {
104+
"pool": engine.pool,
105+
"checked_out": engine.pool.checkedout(),
106+
"checked_in": engine.pool.checkedin(),
107+
"size": engine.pool.size(),
108+
},
109+
"async_engine": {
110+
"pool": async_engine.pool,
111+
"checked_out": async_engine.pool.checkedout(),
112+
"checked_in": async_engine.pool.checkedin(),
113+
"size": async_engine.pool.size(),
114+
}
115+
}

0 commit comments

Comments
 (0)