feat: implement LLM service using litellm for actual inference
This commit is contained in:
parent
97b6d4ec21
commit
f1bf91258a
@ -1,72 +1,56 @@
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
from typing import Dict, Any, Optional
|
||||
from typing import Dict
|
||||
from pydantic import BaseModel
|
||||
from helpers.generic_helpers import (
|
||||
create_generic_item,
|
||||
log_error,
|
||||
safe_json_serialize
|
||||
)
|
||||
from helpers.generic_helpers import process_llm_request, validate_data, log_error
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
class LLMRequest(BaseModel):
|
||||
model: str
|
||||
prompt: str
|
||||
model: Optional[str] = "gpt-3.5-turbo"
|
||||
max_tokens: Optional[int] = 1000
|
||||
temperature: Optional[float] = 0.7
|
||||
options: Optional[Dict[str, Any]] = None
|
||||
temperature: float = 0.7
|
||||
max_tokens: int = 1000
|
||||
|
||||
class LLMResponse(BaseModel):
|
||||
id: str
|
||||
created_at: str
|
||||
updated_at: str
|
||||
prompt: str
|
||||
content: str
|
||||
model: str
|
||||
response: str
|
||||
tokens_used: Optional[int] = None
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
created: int
|
||||
usage: Dict[str, int]
|
||||
|
||||
@router.post("/llm", status_code=status.HTTP_201_CREATED, response_model=LLMResponse)
|
||||
async def process_llm_request(request: LLMRequest):
|
||||
@router.post("/llm", status_code=status.HTTP_200_OK, response_model=LLMResponse)
|
||||
async def generate_llm_response(request: LLMRequest):
|
||||
"""
|
||||
Process a request to generate text using an LLM model.
|
||||
Generate a response from an LLM model using the litellm library.
|
||||
|
||||
This endpoint accepts a prompt and optional parameters, then returns the generated response.
|
||||
This endpoint accepts a model name, prompt, and optional parameters,
|
||||
then calls the actual LLM service to generate a response.
|
||||
"""
|
||||
try:
|
||||
# Validate required fields
|
||||
if not request.prompt:
|
||||
required_fields = ["model", "prompt"]
|
||||
if not validate_data(request.dict(), required_fields):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Prompt is required"
|
||||
detail="Missing required fields: model and prompt are required"
|
||||
)
|
||||
|
||||
# Prepare data for storage
|
||||
llm_data = {
|
||||
"prompt": request.prompt,
|
||||
"model": request.model,
|
||||
"response": f"Generated response for: {request.prompt}", # Mock response
|
||||
"tokens_used": len(request.prompt.split()) * 2, # Mock token count
|
||||
"metadata": {
|
||||
"max_tokens": request.max_tokens,
|
||||
"temperature": request.temperature,
|
||||
"options": request.options or {}
|
||||
}
|
||||
}
|
||||
# Process the LLM request using the helper function
|
||||
result = process_llm_request(
|
||||
model=request.model,
|
||||
prompt=request.prompt,
|
||||
temperature=request.temperature,
|
||||
max_tokens=request.max_tokens
|
||||
)
|
||||
|
||||
# Create item in storage
|
||||
result = create_generic_item(llm_data)
|
||||
|
||||
# Return serialized result
|
||||
return safe_json_serialize(result)
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTP exceptions
|
||||
# Re-raise HTTP exceptions as they already have status codes
|
||||
raise
|
||||
except Exception as e:
|
||||
# Log unexpected errors
|
||||
log_error("Unexpected error processing LLM request", e)
|
||||
log_error("Unexpected error in LLM endpoint", e)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="An error occurred while processing your request"
|
||||
detail=f"Failed to process LLM request: {str(e)}"
|
||||
)
|
@ -6,16 +6,14 @@ import traceback
|
||||
import time
|
||||
import hashlib
|
||||
from fastapi import HTTPException
|
||||
|
||||
# Since we don't have specific entity information and no model/schema code,
|
||||
# we'll create generic utility helper functions that don't rely on database access
|
||||
|
||||
# In-memory data store as fallback
|
||||
_generic_store: List[Dict[str, Any]] = []
|
||||
import litellm
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# In-memory data store as fallback
|
||||
_generic_store: List[Dict[str, Any]] = []
|
||||
|
||||
def generate_unique_id() -> str:
|
||||
"""
|
||||
Generates a unique identifier.
|
||||
@ -288,3 +286,50 @@ def handle_http_error(status_code: int, detail: str) -> None:
|
||||
HTTPException: With the specified status code and detail
|
||||
"""
|
||||
raise HTTPException(status_code=status_code, detail=detail)
|
||||
|
||||
def process_llm_request(model: str, prompt: str, temperature: float = 0.7, max_tokens: int = 1000) -> Dict[str, Any]:
|
||||
"""
|
||||
Processes an LLM request using litellm to handle the actual inference.
|
||||
|
||||
Args:
|
||||
model (str): The LLM model to use for inference
|
||||
prompt (str): The prompt text to send to the LLM
|
||||
temperature (float): Controls randomness in the output (0-1)
|
||||
max_tokens (int): Maximum number of tokens to generate
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The LLM response with content and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Sending request to LLM model: {model}")
|
||||
|
||||
# Make the actual LLM call using litellm
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
|
||||
# Process and return the response
|
||||
result = {
|
||||
"id": response.id,
|
||||
"content": response.choices[0].message.content,
|
||||
"model": response.model,
|
||||
"created": response.created,
|
||||
"usage": {
|
||||
"prompt_tokens": response.usage.prompt_tokens,
|
||||
"completion_tokens": response.usage.completion_tokens,
|
||||
"total_tokens": response.usage.total_tokens
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(f"LLM request completed successfully. Used {result['usage']['total_tokens']} tokens.")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
log_error("Error processing LLM request", e)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"LLM processing error: {str(e)}"
|
||||
)
|
@ -10,3 +10,4 @@ alembic>=1.13.1
|
||||
jose
|
||||
passlib
|
||||
pydantic
|
||||
litellm
|
||||
|
Loading…
x
Reference in New Issue
Block a user