feat: implement LLM service using litellm for actual inference

2025-04-29 17:12:55 +00:00 · 2025-04-29 17:12:55 +00:00 · f1bf91258a
commit f1bf91258a
parent 97b6d4ec21
3 changed files with 80 additions and 50 deletions
--- a/endpoints/llm.post.py
+++ b/endpoints/llm.post.py
@ -1,72 +1,56 @@
 from fastapi import APIRouter, HTTPException, status
-from typing import Dict, Any, Optional
+from typing import Dict
 from pydantic import BaseModel
-from helpers.generic_helpers import (
+from helpers.generic_helpers import process_llm_request, validate_data, log_error
    create_generic_item,
    log_error,
    safe_json_serialize
 )
 router = APIRouter()
 class LLMRequest(BaseModel):
    model: str
    prompt: str
-    model: Optional[str] = "gpt-3.5-turbo"
+    temperature: float = 0.7
-    max_tokens: Optional[int] = 1000
+    max_tokens: int = 1000
    temperature: Optional[float] = 0.7
    options: Optional[Dict[str, Any]] = None
 class LLMResponse(BaseModel):
    id: str
-    created_at: str
+    content: str
    updated_at: str
    prompt: str
    model: str
-    response: str
+    created: int
-    tokens_used: Optional[int] = None
+    usage: Dict[str, int]
    metadata: Optional[Dict[str, Any]] = None
-@router.post("/llm", status_code=status.HTTP_201_CREATED, response_model=LLMResponse)
+@router.post("/llm", status_code=status.HTTP_200_OK, response_model=LLMResponse)
-async def process_llm_request(request: LLMRequest):
+async def generate_llm_response(request: LLMRequest):
    """
-    Process a request to generate text using an LLM model.
+    Generate a response from an LLM model using the litellm library.
-    This endpoint accepts a prompt and optional parameters, then returns the generated response.
+    This endpoint accepts a model name, prompt, and optional parameters,
    then calls the actual LLM service to generate a response.
    """
    try:
        # Validate required fields
-        if not request.prompt:
+        required_fields = ["model", "prompt"]
        if not validate_data(request.dict(), required_fields):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="Prompt is required"
+                detail="Missing required fields: model and prompt are required"
            )
-        # Prepare data for storage
+        # Process the LLM request using the helper function
-        llm_data = {
+        result = process_llm_request(
-            "prompt": request.prompt,
+            model=request.model,
-            "model": request.model,
+            prompt=request.prompt,
-            "response": f"Generated response for: {request.prompt}",  # Mock response
+            temperature=request.temperature,
-            "tokens_used": len(request.prompt.split()) * 2,  # Mock token count
+            max_tokens=request.max_tokens
-            "metadata": {
+        )
                "max_tokens": request.max_tokens,
                "temperature": request.temperature,
                "options": request.options or {}
            }
        }
-        # Create item in storage
+        return result
        result = create_generic_item(llm_data)
        # Return serialized result
        return safe_json_serialize(result)
    except HTTPException:
-        # Re-raise HTTP exceptions
+        # Re-raise HTTP exceptions as they already have status codes
        raise
    except Exception as e:
-        # Log unexpected errors
+        log_error("Unexpected error in LLM endpoint", e)
        log_error("Unexpected error processing LLM request", e)
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="An error occurred while processing your request"
+            detail=f"Failed to process LLM request: {str(e)}"
        )
--- a/helpers/generic_helpers.py
+++ b/helpers/generic_helpers.py
@ -6,16 +6,14 @@ import traceback
 import time
 import hashlib
 from fastapi import HTTPException
-
+import litellm
 # Since we don't have specific entity information and no model/schema code,
 # we'll create generic utility helper functions that don't rely on database access
 # In-memory data store as fallback
 _generic_store: List[Dict[str, Any]] = []
 # Configure logging
 logger = logging.getLogger(__name__)
 # In-memory data store as fallback
 _generic_store: List[Dict[str, Any]] = []
 def generate_unique_id() -> str:
    """
    Generates a unique identifier.
@ -288,3 +286,50 @@ def handle_http_error(status_code: int, detail: str) -> None:
        HTTPException: With the specified status code and detail
    """
    raise HTTPException(status_code=status_code, detail=detail)
 def process_llm_request(model: str, prompt: str, temperature: float = 0.7, max_tokens: int = 1000) -> Dict[str, Any]:
    """
    Processes an LLM request using litellm to handle the actual inference.
    Args:
        model (str): The LLM model to use for inference
        prompt (str): The prompt text to send to the LLM
        temperature (float): Controls randomness in the output (0-1)
        max_tokens (int): Maximum number of tokens to generate
    Returns:
        Dict[str, Any]: The LLM response with content and metadata
    """
    try:
        logger.info(f"Sending request to LLM model: {model}")
        # Make the actual LLM call using litellm
        response = litellm.completion(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
            max_tokens=max_tokens
        )
        # Process and return the response
        result = {
            "id": response.id,
            "content": response.choices[0].message.content,
            "model": response.model,
            "created": response.created,
            "usage": {
                "prompt_tokens": response.usage.prompt_tokens,
                "completion_tokens": response.usage.completion_tokens,
                "total_tokens": response.usage.total_tokens
            }
        }
        logger.info(f"LLM request completed successfully. Used {result['usage']['total_tokens']} tokens.")
        return result
    except Exception as e:
        log_error("Error processing LLM request", e)
        raise HTTPException(
            status_code=500,
            detail=f"LLM processing error: {str(e)}"
        )
--- a/requirements.txt
+++ b/requirements.txt
@ -10,3 +10,4 @@ alembic>=1.13.1
 jose
 passlib
 pydantic
 litellm