diff --git a/endpoints/llm.post.py b/endpoints/llm.post.py index d129c39..362fd5b 100644 --- a/endpoints/llm.post.py +++ b/endpoints/llm.post.py @@ -1,72 +1,56 @@ from fastapi import APIRouter, HTTPException, status -from typing import Dict, Any, Optional +from typing import Dict from pydantic import BaseModel -from helpers.generic_helpers import ( - create_generic_item, - log_error, - safe_json_serialize -) +from helpers.generic_helpers import process_llm_request, validate_data, log_error router = APIRouter() class LLMRequest(BaseModel): + model: str prompt: str - model: Optional[str] = "gpt-3.5-turbo" - max_tokens: Optional[int] = 1000 - temperature: Optional[float] = 0.7 - options: Optional[Dict[str, Any]] = None + temperature: float = 0.7 + max_tokens: int = 1000 class LLMResponse(BaseModel): id: str - created_at: str - updated_at: str - prompt: str + content: str model: str - response: str - tokens_used: Optional[int] = None - metadata: Optional[Dict[str, Any]] = None + created: int + usage: Dict[str, int] -@router.post("/llm", status_code=status.HTTP_201_CREATED, response_model=LLMResponse) -async def process_llm_request(request: LLMRequest): +@router.post("/llm", status_code=status.HTTP_200_OK, response_model=LLMResponse) +async def generate_llm_response(request: LLMRequest): """ - Process a request to generate text using an LLM model. + Generate a response from an LLM model using the litellm library. - This endpoint accepts a prompt and optional parameters, then returns the generated response. + This endpoint accepts a model name, prompt, and optional parameters, + then calls the actual LLM service to generate a response. """ try: # Validate required fields - if not request.prompt: + required_fields = ["model", "prompt"] + if not validate_data(request.dict(), required_fields): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail="Prompt is required" + detail="Missing required fields: model and prompt are required" ) - # Prepare data for storage - llm_data = { - "prompt": request.prompt, - "model": request.model, - "response": f"Generated response for: {request.prompt}", # Mock response - "tokens_used": len(request.prompt.split()) * 2, # Mock token count - "metadata": { - "max_tokens": request.max_tokens, - "temperature": request.temperature, - "options": request.options or {} - } - } + # Process the LLM request using the helper function + result = process_llm_request( + model=request.model, + prompt=request.prompt, + temperature=request.temperature, + max_tokens=request.max_tokens + ) - # Create item in storage - result = create_generic_item(llm_data) - - # Return serialized result - return safe_json_serialize(result) + return result except HTTPException: - # Re-raise HTTP exceptions + # Re-raise HTTP exceptions as they already have status codes raise except Exception as e: - # Log unexpected errors - log_error("Unexpected error processing LLM request", e) + log_error("Unexpected error in LLM endpoint", e) raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="An error occurred while processing your request" + detail=f"Failed to process LLM request: {str(e)}" ) \ No newline at end of file diff --git a/helpers/generic_helpers.py b/helpers/generic_helpers.py index 071f2a5..749c5ea 100644 --- a/helpers/generic_helpers.py +++ b/helpers/generic_helpers.py @@ -6,16 +6,14 @@ import traceback import time import hashlib from fastapi import HTTPException - -# Since we don't have specific entity information and no model/schema code, -# we'll create generic utility helper functions that don't rely on database access - -# In-memory data store as fallback -_generic_store: List[Dict[str, Any]] = [] +import litellm # Configure logging logger = logging.getLogger(__name__) +# In-memory data store as fallback +_generic_store: List[Dict[str, Any]] = [] + def generate_unique_id() -> str: """ Generates a unique identifier. @@ -287,4 +285,51 @@ def handle_http_error(status_code: int, detail: str) -> None: Raises: HTTPException: With the specified status code and detail """ - raise HTTPException(status_code=status_code, detail=detail) \ No newline at end of file + raise HTTPException(status_code=status_code, detail=detail) + +def process_llm_request(model: str, prompt: str, temperature: float = 0.7, max_tokens: int = 1000) -> Dict[str, Any]: + """ + Processes an LLM request using litellm to handle the actual inference. + + Args: + model (str): The LLM model to use for inference + prompt (str): The prompt text to send to the LLM + temperature (float): Controls randomness in the output (0-1) + max_tokens (int): Maximum number of tokens to generate + + Returns: + Dict[str, Any]: The LLM response with content and metadata + """ + try: + logger.info(f"Sending request to LLM model: {model}") + + # Make the actual LLM call using litellm + response = litellm.completion( + model=model, + messages=[{"role": "user", "content": prompt}], + temperature=temperature, + max_tokens=max_tokens + ) + + # Process and return the response + result = { + "id": response.id, + "content": response.choices[0].message.content, + "model": response.model, + "created": response.created, + "usage": { + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens + } + } + + logger.info(f"LLM request completed successfully. Used {result['usage']['total_tokens']} tokens.") + return result + + except Exception as e: + log_error("Error processing LLM request", e) + raise HTTPException( + status_code=500, + detail=f"LLM processing error: {str(e)}" + ) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index db12c92..69740f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ alembic>=1.13.1 jose passlib pydantic +litellm