Fix health check for container deployment
This commit is contained in:
parent
4abac2b250
commit
a9f9e6dd22
70
app/config.py
Normal file
70
app/config.py
Normal file
@ -0,0 +1,70 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
import logging
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Application environment
|
||||
APP_ENV = os.getenv("APP_ENV", "development") # 'development', 'production', 'testing'
|
||||
|
||||
# Database settings
|
||||
DB_CONNECT_RETRY = int(os.getenv("DB_CONNECT_RETRY", "3"))
|
||||
DB_CONNECT_RETRY_DELAY = int(os.getenv("DB_CONNECT_RETRY_DELAY", "1")) # seconds
|
||||
|
||||
# Container settings
|
||||
CONTAINER_DB_PATH = os.getenv("CONTAINER_DB_PATH", "/app/storage/db")
|
||||
|
||||
# Health check settings
|
||||
HEALTH_CHECK_INCLUDE_DB = (
|
||||
os.getenv("HEALTH_CHECK_INCLUDE_DB", "false").lower() == "true"
|
||||
)
|
||||
HEALTH_CHECK_PATH = os.getenv("HEALTH_CHECK_PATH", "/health")
|
||||
DETAILED_HEALTH_CHECK_PATH = os.getenv("DETAILED_HEALTH_CHECK_PATH", "/health/detailed")
|
||||
|
||||
# Application settings
|
||||
DEBUG = os.getenv("DEBUG", "true").lower() == "true"
|
||||
HOST = os.getenv("HOST", "0.0.0.0")
|
||||
PORT = int(os.getenv("PORT", "8000"))
|
||||
|
||||
# CORS settings
|
||||
CORS_ORIGINS = os.getenv("CORS_ORIGINS", "*").split(",")
|
||||
|
||||
# API settings
|
||||
API_PREFIX = os.getenv("API_PREFIX", "")
|
||||
API_TITLE = os.getenv("API_TITLE", "Todo List API")
|
||||
API_DESCRIPTION = os.getenv(
|
||||
"API_DESCRIPTION", "A simple Todo List API built with FastAPI"
|
||||
)
|
||||
API_VERSION = os.getenv("API_VERSION", "0.1.0")
|
||||
|
||||
# Get project root directory for local development
|
||||
PROJECT_ROOT = Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Log important configuration settings
|
||||
logger.info(f"Running in {APP_ENV} mode")
|
||||
logger.info(f"API will be available at {HOST}:{PORT}")
|
||||
logger.info(f"Health check path: {HEALTH_CHECK_PATH}")
|
||||
logger.info(f"Detailed health check path: {DETAILED_HEALTH_CHECK_PATH}")
|
||||
|
||||
|
||||
# Function to get all config as a dict (useful for debugging)
|
||||
def get_settings() -> Dict[str, Any]:
|
||||
"""Return all configuration settings as a dictionary."""
|
||||
return {
|
||||
"APP_ENV": APP_ENV,
|
||||
"DEBUG": DEBUG,
|
||||
"HOST": HOST,
|
||||
"PORT": PORT,
|
||||
"API_PREFIX": API_PREFIX,
|
||||
"API_TITLE": API_TITLE,
|
||||
"API_VERSION": API_VERSION,
|
||||
"HEALTH_CHECK_PATH": HEALTH_CHECK_PATH,
|
||||
"DETAILED_HEALTH_CHECK_PATH": DETAILED_HEALTH_CHECK_PATH,
|
||||
"HEALTH_CHECK_INCLUDE_DB": HEALTH_CHECK_INCLUDE_DB,
|
||||
"DB_CONNECT_RETRY": DB_CONNECT_RETRY,
|
||||
"DB_CONNECT_RETRY_DELAY": DB_CONNECT_RETRY_DELAY,
|
||||
"CORS_ORIGINS": CORS_ORIGINS,
|
||||
}
|
@ -1,23 +1,86 @@
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy import create_engine, event
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from pathlib import Path
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
|
||||
# Get project root directory and create a storage directory
|
||||
PROJECT_ROOT = Path(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
# Import app config
|
||||
from app.config import (
|
||||
PROJECT_ROOT,
|
||||
CONTAINER_DB_PATH,
|
||||
DB_CONNECT_RETRY,
|
||||
DB_CONNECT_RETRY_DELAY,
|
||||
)
|
||||
DB_DIR = PROJECT_ROOT / "storage" / "db"
|
||||
DB_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Get paths for database storage
|
||||
# Try container path first, then fallback to local path
|
||||
CONTAINER_DB_PATH = Path(CONTAINER_DB_PATH)
|
||||
LOCAL_DB_PATH = PROJECT_ROOT / "storage" / "db"
|
||||
|
||||
# Use container path if it exists and is writable, otherwise use local path
|
||||
DB_PATH = CONTAINER_DB_PATH if CONTAINER_DB_PATH.exists() else LOCAL_DB_PATH
|
||||
DB_PATH.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info(f"Using database path: {DB_PATH}")
|
||||
|
||||
# SQLite database URL
|
||||
SQLALCHEMY_DATABASE_URL = f"sqlite:///{DB_DIR}/db.sqlite"
|
||||
SQLALCHEMY_DATABASE_URL = f"sqlite:///{DB_PATH}/db.sqlite"
|
||||
|
||||
# Connection retry settings
|
||||
MAX_RETRIES = DB_CONNECT_RETRY
|
||||
RETRY_DELAY = DB_CONNECT_RETRY_DELAY # seconds
|
||||
|
||||
|
||||
# Create the SQLAlchemy engine with retry logic
|
||||
def get_engine():
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
engine = create_engine(
|
||||
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
|
||||
)
|
||||
# Test connection
|
||||
with engine.connect() as conn:
|
||||
conn.execute("SELECT 1")
|
||||
return engine
|
||||
except Exception as e:
|
||||
if attempt < MAX_RETRIES - 1:
|
||||
logger.warning(
|
||||
f"Database connection attempt {attempt + 1} failed: {e}. Retrying in {RETRY_DELAY}s..."
|
||||
)
|
||||
time.sleep(RETRY_DELAY)
|
||||
else:
|
||||
logger.error(
|
||||
f"Failed to connect to database after {MAX_RETRIES} attempts: {e}"
|
||||
)
|
||||
# Still return the engine, we'll handle connection errors in the request handlers
|
||||
return create_engine(
|
||||
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
|
||||
)
|
||||
|
||||
|
||||
# Create engine
|
||||
engine = get_engine()
|
||||
|
||||
|
||||
# Add event listener for connection pool "checkout" events
|
||||
@event.listens_for(engine, "connect")
|
||||
def ping_connection(dbapi_connection, connection_record):
|
||||
# Ping the connection to ensure it's valid
|
||||
try:
|
||||
cursor = dbapi_connection.cursor()
|
||||
cursor.execute("SELECT 1")
|
||||
cursor.close()
|
||||
except Exception:
|
||||
# Reconnect if the connection is invalid
|
||||
logger.warning("Connection ping failed. Connection will be recycled.")
|
||||
connection_record.connection = None
|
||||
raise
|
||||
|
||||
# Create the SQLAlchemy engine
|
||||
engine = create_engine(
|
||||
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
|
||||
)
|
||||
|
||||
# Create a SessionLocal class
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
@ -28,10 +91,16 @@ Base = declarative_base()
|
||||
|
||||
# Create tables (important for first run)
|
||||
def create_tables():
|
||||
Base.metadata.create_all(bind=engine)
|
||||
try:
|
||||
Base.metadata.create_all(bind=engine)
|
||||
logger.info("Database tables created successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating database tables: {e}")
|
||||
# Don't raise the exception - let the application start even if tables can't be created
|
||||
# Tables will be created later when the database becomes available
|
||||
|
||||
|
||||
# Dependency to get a database session
|
||||
# Dependency to get a database session with improved error handling
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
@ -39,8 +108,8 @@ def get_db():
|
||||
db.execute("SELECT 1")
|
||||
yield db
|
||||
except Exception as e:
|
||||
# Log the error (in a real-world application)
|
||||
print(f"Database connection error: {e}")
|
||||
# Log the error
|
||||
logger.error(f"Database connection error in get_db: {e}")
|
||||
# Provide a user-friendly error
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
|
145
main.py
145
main.py
@ -1,15 +1,35 @@
|
||||
from fastapi import FastAPI, Depends, HTTPException, status
|
||||
from fastapi import FastAPI, Depends, HTTPException, status, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from sqlalchemy.orm import Session
|
||||
import uvicorn
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
import logging
|
||||
|
||||
# Import application config
|
||||
from app.config import (
|
||||
API_TITLE,
|
||||
API_DESCRIPTION,
|
||||
API_VERSION,
|
||||
CORS_ORIGINS,
|
||||
HOST,
|
||||
PORT,
|
||||
DEBUG,
|
||||
HEALTH_CHECK_PATH,
|
||||
DETAILED_HEALTH_CHECK_PATH,
|
||||
HEALTH_CHECK_INCLUDE_DB,
|
||||
get_settings,
|
||||
)
|
||||
|
||||
# Import database models and config
|
||||
from app.database.config import get_db, create_tables
|
||||
from app.database.config import get_db, create_tables, SessionLocal
|
||||
from app.database.models import Todo as TodoModel
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Create tables if they don't exist
|
||||
# In production, you should use Alembic migrations instead
|
||||
create_tables()
|
||||
@ -37,42 +57,86 @@ class TodoResponse(TodoBase):
|
||||
|
||||
# Create the FastAPI app
|
||||
app = FastAPI(
|
||||
title="Todo List API",
|
||||
description="A simple Todo List API built with FastAPI",
|
||||
version="0.1.0",
|
||||
title=API_TITLE,
|
||||
description=API_DESCRIPTION,
|
||||
version=API_VERSION,
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc",
|
||||
openapi_url="/openapi.json",
|
||||
)
|
||||
|
||||
# Configure CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_origins=CORS_ORIGINS,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# Health endpoint
|
||||
@app.get("/health", tags=["Health"])
|
||||
async def health_check(db: Session = Depends(get_db)):
|
||||
# Health endpoints
|
||||
@app.get(HEALTH_CHECK_PATH, tags=["Health"], status_code=200)
|
||||
async def health_check():
|
||||
"""
|
||||
Health check endpoint to verify the API is running and database connection is working.
|
||||
Simple health check endpoint that always returns healthy.
|
||||
This is used by container orchestration systems to verify the app is running.
|
||||
|
||||
The health check will always return a 200 OK status to indicate the application
|
||||
is running, even if some components (like the database) might be unavailable.
|
||||
"""
|
||||
try:
|
||||
# Test database connection
|
||||
db.execute("SELECT 1").first()
|
||||
return {
|
||||
"status": "healthy",
|
||||
"database": "connected",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"status": "unhealthy",
|
||||
"database": "disconnected",
|
||||
"error": str(e),
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
logger.info(f"Health check requested at {HEALTH_CHECK_PATH}")
|
||||
return {"status": "healthy", "timestamp": datetime.now().isoformat()}
|
||||
|
||||
|
||||
@app.get(DETAILED_HEALTH_CHECK_PATH, tags=["Health"])
|
||||
async def detailed_health_check(response: Response):
|
||||
"""
|
||||
Detailed health check endpoint that verifies API and database connectivity.
|
||||
|
||||
This endpoint provides more detailed information about the state of various
|
||||
components of the application, including the database connection status.
|
||||
"""
|
||||
logger.info(f"Detailed health check requested at {DETAILED_HEALTH_CHECK_PATH}")
|
||||
|
||||
# Start with basic info
|
||||
health_data = {
|
||||
"status": "healthy", # App is running, so it's healthy from an orchestration perspective
|
||||
"services": {
|
||||
"api": "running",
|
||||
},
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"config": {
|
||||
"db_check_enabled": HEALTH_CHECK_INCLUDE_DB,
|
||||
"environment": get_settings().get("APP_ENV"),
|
||||
},
|
||||
}
|
||||
|
||||
# Check database if enabled
|
||||
if HEALTH_CHECK_INCLUDE_DB:
|
||||
db_status = "unknown"
|
||||
error_message = None
|
||||
|
||||
# Create a new session for the health check
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Test database connection
|
||||
db.execute("SELECT 1").first()
|
||||
db_status = "connected"
|
||||
except Exception as e:
|
||||
db_status = "disconnected"
|
||||
error_message = str(e)
|
||||
# Don't change HTTP status code - we want health check to be 200 OK
|
||||
# to ensure container orchestration doesn't kill the app
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
health_data["services"]["database"] = db_status
|
||||
|
||||
if error_message:
|
||||
health_data["services"]["database_error"] = error_message
|
||||
|
||||
return health_data
|
||||
|
||||
|
||||
# Root endpoint
|
||||
@ -159,6 +223,35 @@ def delete_todo(todo_id: int, db: Session = Depends(get_db)):
|
||||
return None
|
||||
|
||||
|
||||
# Application startup and shutdown events
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""
|
||||
Function that runs when the application starts.
|
||||
"""
|
||||
logger.info("Starting Todo List API")
|
||||
logger.info(f"API Version: {API_VERSION}")
|
||||
logger.info(f"Environment: {get_settings().get('APP_ENV')}")
|
||||
logger.info(f"Debug mode: {DEBUG}")
|
||||
|
||||
# Log all available routes
|
||||
routes = []
|
||||
for route in app.routes:
|
||||
routes.append(f"{route.path} [{', '.join(route.methods)}]")
|
||||
|
||||
logger.info(f"Available routes: {len(routes)}")
|
||||
for route in sorted(routes):
|
||||
logger.info(f" {route}")
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
"""
|
||||
Function that runs when the application shuts down.
|
||||
"""
|
||||
logger.info("Shutting down Todo List API")
|
||||
|
||||
|
||||
# For local development
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
|
||||
uvicorn.run("main:app", host=HOST, port=PORT, reload=DEBUG, log_level="info")
|
||||
|
Loading…
x
Reference in New Issue
Block a user