mark-xl1tc0/endpoints/llm.post.py

72 lines
2.2 KiB
Python

from fastapi import APIRouter, HTTPException, status
from typing import Dict, Any, Optional
from pydantic import BaseModel
from helpers.generic_helpers import (
create_generic_item,
log_error,
safe_json_serialize
)
router = APIRouter()
class LLMRequest(BaseModel):
prompt: str
model: Optional[str] = "gpt-3.5-turbo"
max_tokens: Optional[int] = 1000
temperature: Optional[float] = 0.7
options: Optional[Dict[str, Any]] = None
class LLMResponse(BaseModel):
id: str
created_at: str
updated_at: str
prompt: str
model: str
response: str
tokens_used: Optional[int] = None
metadata: Optional[Dict[str, Any]] = None
@router.post("/llm", status_code=status.HTTP_201_CREATED, response_model=LLMResponse)
async def process_llm_request(request: LLMRequest):
"""
Process a request to generate text using an LLM model.
This endpoint accepts a prompt and optional parameters, then returns the generated response.
"""
try:
# Validate required fields
if not request.prompt:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Prompt is required"
)
# Prepare data for storage
llm_data = {
"prompt": request.prompt,
"model": request.model,
"response": f"Generated response for: {request.prompt}", # Mock response
"tokens_used": len(request.prompt.split()) * 2, # Mock token count
"metadata": {
"max_tokens": request.max_tokens,
"temperature": request.temperature,
"options": request.options or {}
}
}
# Create item in storage
result = create_generic_item(llm_data)
# Return serialized result
return safe_json_serialize(result)
except HTTPException:
# Re-raise HTTP exceptions
raise
except Exception as e:
# Log unexpected errors
log_error("Unexpected error processing LLM request", e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="An error occurred while processing your request"
)