feat: implement LLM service using litellm for actual inference

This commit is contained in:
Backend IM Bot 2025-04-29 17:12:55 +00:00
parent 97b6d4ec21
commit f1bf91258a
3 changed files with 80 additions and 50 deletions

View File

@ -1,72 +1,56 @@
from fastapi import APIRouter, HTTPException, status from fastapi import APIRouter, HTTPException, status
from typing import Dict, Any, Optional from typing import Dict
from pydantic import BaseModel from pydantic import BaseModel
from helpers.generic_helpers import ( from helpers.generic_helpers import process_llm_request, validate_data, log_error
create_generic_item,
log_error,
safe_json_serialize
)
router = APIRouter() router = APIRouter()
class LLMRequest(BaseModel): class LLMRequest(BaseModel):
model: str
prompt: str prompt: str
model: Optional[str] = "gpt-3.5-turbo" temperature: float = 0.7
max_tokens: Optional[int] = 1000 max_tokens: int = 1000
temperature: Optional[float] = 0.7
options: Optional[Dict[str, Any]] = None
class LLMResponse(BaseModel): class LLMResponse(BaseModel):
id: str id: str
created_at: str content: str
updated_at: str
prompt: str
model: str model: str
response: str created: int
tokens_used: Optional[int] = None usage: Dict[str, int]
metadata: Optional[Dict[str, Any]] = None
@router.post("/llm", status_code=status.HTTP_201_CREATED, response_model=LLMResponse) @router.post("/llm", status_code=status.HTTP_200_OK, response_model=LLMResponse)
async def process_llm_request(request: LLMRequest): async def generate_llm_response(request: LLMRequest):
""" """
Process a request to generate text using an LLM model. Generate a response from an LLM model using the litellm library.
This endpoint accepts a prompt and optional parameters, then returns the generated response. This endpoint accepts a model name, prompt, and optional parameters,
then calls the actual LLM service to generate a response.
""" """
try: try:
# Validate required fields # Validate required fields
if not request.prompt: required_fields = ["model", "prompt"]
if not validate_data(request.dict(), required_fields):
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
detail="Prompt is required" detail="Missing required fields: model and prompt are required"
) )
# Prepare data for storage # Process the LLM request using the helper function
llm_data = { result = process_llm_request(
"prompt": request.prompt, model=request.model,
"model": request.model, prompt=request.prompt,
"response": f"Generated response for: {request.prompt}", # Mock response temperature=request.temperature,
"tokens_used": len(request.prompt.split()) * 2, # Mock token count max_tokens=request.max_tokens
"metadata": { )
"max_tokens": request.max_tokens,
"temperature": request.temperature,
"options": request.options or {}
}
}
# Create item in storage return result
result = create_generic_item(llm_data)
# Return serialized result
return safe_json_serialize(result)
except HTTPException: except HTTPException:
# Re-raise HTTP exceptions # Re-raise HTTP exceptions as they already have status codes
raise raise
except Exception as e: except Exception as e:
# Log unexpected errors log_error("Unexpected error in LLM endpoint", e)
log_error("Unexpected error processing LLM request", e)
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="An error occurred while processing your request" detail=f"Failed to process LLM request: {str(e)}"
) )

View File

@ -6,16 +6,14 @@ import traceback
import time import time
import hashlib import hashlib
from fastapi import HTTPException from fastapi import HTTPException
import litellm
# Since we don't have specific entity information and no model/schema code,
# we'll create generic utility helper functions that don't rely on database access
# In-memory data store as fallback
_generic_store: List[Dict[str, Any]] = []
# Configure logging # Configure logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# In-memory data store as fallback
_generic_store: List[Dict[str, Any]] = []
def generate_unique_id() -> str: def generate_unique_id() -> str:
""" """
Generates a unique identifier. Generates a unique identifier.
@ -288,3 +286,50 @@ def handle_http_error(status_code: int, detail: str) -> None:
HTTPException: With the specified status code and detail HTTPException: With the specified status code and detail
""" """
raise HTTPException(status_code=status_code, detail=detail) raise HTTPException(status_code=status_code, detail=detail)
def process_llm_request(model: str, prompt: str, temperature: float = 0.7, max_tokens: int = 1000) -> Dict[str, Any]:
"""
Processes an LLM request using litellm to handle the actual inference.
Args:
model (str): The LLM model to use for inference
prompt (str): The prompt text to send to the LLM
temperature (float): Controls randomness in the output (0-1)
max_tokens (int): Maximum number of tokens to generate
Returns:
Dict[str, Any]: The LLM response with content and metadata
"""
try:
logger.info(f"Sending request to LLM model: {model}")
# Make the actual LLM call using litellm
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=temperature,
max_tokens=max_tokens
)
# Process and return the response
result = {
"id": response.id,
"content": response.choices[0].message.content,
"model": response.model,
"created": response.created,
"usage": {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
}
}
logger.info(f"LLM request completed successfully. Used {result['usage']['total_tokens']} tokens.")
return result
except Exception as e:
log_error("Error processing LLM request", e)
raise HTTPException(
status_code=500,
detail=f"LLM processing error: {str(e)}"
)

View File

@ -10,3 +10,4 @@ alembic>=1.13.1
jose jose
passlib passlib
pydantic pydantic
litellm