feat: implement LLM service using litellm for actual inference

This commit is contained in:
Backend IM Bot 2025-04-29 17:12:55 +00:00
parent 97b6d4ec21
commit f1bf91258a
3 changed files with 80 additions and 50 deletions

View File

@ -1,72 +1,56 @@
from fastapi import APIRouter, HTTPException, status
from typing import Dict, Any, Optional
from typing import Dict
from pydantic import BaseModel
from helpers.generic_helpers import (
create_generic_item,
log_error,
safe_json_serialize
)
from helpers.generic_helpers import process_llm_request, validate_data, log_error
router = APIRouter()
class LLMRequest(BaseModel):
model: str
prompt: str
model: Optional[str] = "gpt-3.5-turbo"
max_tokens: Optional[int] = 1000
temperature: Optional[float] = 0.7
options: Optional[Dict[str, Any]] = None
temperature: float = 0.7
max_tokens: int = 1000
class LLMResponse(BaseModel):
id: str
created_at: str
updated_at: str
prompt: str
content: str
model: str
response: str
tokens_used: Optional[int] = None
metadata: Optional[Dict[str, Any]] = None
created: int
usage: Dict[str, int]
@router.post("/llm", status_code=status.HTTP_201_CREATED, response_model=LLMResponse)
async def process_llm_request(request: LLMRequest):
@router.post("/llm", status_code=status.HTTP_200_OK, response_model=LLMResponse)
async def generate_llm_response(request: LLMRequest):
"""
Process a request to generate text using an LLM model.
Generate a response from an LLM model using the litellm library.
This endpoint accepts a prompt and optional parameters, then returns the generated response.
This endpoint accepts a model name, prompt, and optional parameters,
then calls the actual LLM service to generate a response.
"""
try:
# Validate required fields
if not request.prompt:
required_fields = ["model", "prompt"]
if not validate_data(request.dict(), required_fields):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Prompt is required"
detail="Missing required fields: model and prompt are required"
)
# Prepare data for storage
llm_data = {
"prompt": request.prompt,
"model": request.model,
"response": f"Generated response for: {request.prompt}", # Mock response
"tokens_used": len(request.prompt.split()) * 2, # Mock token count
"metadata": {
"max_tokens": request.max_tokens,
"temperature": request.temperature,
"options": request.options or {}
}
}
# Process the LLM request using the helper function
result = process_llm_request(
model=request.model,
prompt=request.prompt,
temperature=request.temperature,
max_tokens=request.max_tokens
)
# Create item in storage
result = create_generic_item(llm_data)
# Return serialized result
return safe_json_serialize(result)
return result
except HTTPException:
# Re-raise HTTP exceptions
# Re-raise HTTP exceptions as they already have status codes
raise
except Exception as e:
# Log unexpected errors
log_error("Unexpected error processing LLM request", e)
log_error("Unexpected error in LLM endpoint", e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="An error occurred while processing your request"
detail=f"Failed to process LLM request: {str(e)}"
)

View File

@ -6,16 +6,14 @@ import traceback
import time
import hashlib
from fastapi import HTTPException
# Since we don't have specific entity information and no model/schema code,
# we'll create generic utility helper functions that don't rely on database access
# In-memory data store as fallback
_generic_store: List[Dict[str, Any]] = []
import litellm
# Configure logging
logger = logging.getLogger(__name__)
# In-memory data store as fallback
_generic_store: List[Dict[str, Any]] = []
def generate_unique_id() -> str:
"""
Generates a unique identifier.
@ -287,4 +285,51 @@ def handle_http_error(status_code: int, detail: str) -> None:
Raises:
HTTPException: With the specified status code and detail
"""
raise HTTPException(status_code=status_code, detail=detail)
raise HTTPException(status_code=status_code, detail=detail)
def process_llm_request(model: str, prompt: str, temperature: float = 0.7, max_tokens: int = 1000) -> Dict[str, Any]:
"""
Processes an LLM request using litellm to handle the actual inference.
Args:
model (str): The LLM model to use for inference
prompt (str): The prompt text to send to the LLM
temperature (float): Controls randomness in the output (0-1)
max_tokens (int): Maximum number of tokens to generate
Returns:
Dict[str, Any]: The LLM response with content and metadata
"""
try:
logger.info(f"Sending request to LLM model: {model}")
# Make the actual LLM call using litellm
response = litellm.completion(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=temperature,
max_tokens=max_tokens
)
# Process and return the response
result = {
"id": response.id,
"content": response.choices[0].message.content,
"model": response.model,
"created": response.created,
"usage": {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
}
}
logger.info(f"LLM request completed successfully. Used {result['usage']['total_tokens']} tokens.")
return result
except Exception as e:
log_error("Error processing LLM request", e)
raise HTTPException(
status_code=500,
detail=f"LLM processing error: {str(e)}"
)

View File

@ -10,3 +10,4 @@ alembic>=1.13.1
jose
passlib
pydantic
litellm