from typing import Dict, List, Optional, Any import logging import hashlib import json import httpx from tenacity import retry, stop_after_attempt, wait_exponential from app.core.config import settings from app.core.cache import api_cache logger = logging.getLogger(__name__) class MediastackClient: """ Client for interacting with the Mediastack API. """ def __init__(self, api_key: str = None, base_url: str = None): """ Initialize the Mediastack API client. Args: api_key: The Mediastack API key. Defaults to settings.MEDIASTACK_API_KEY. base_url: The base URL for the Mediastack API. Defaults to settings.MEDIASTACK_BASE_URL. """ self.api_key = api_key or settings.MEDIASTACK_API_KEY self.base_url = base_url or settings.MEDIASTACK_BASE_URL if not self.api_key: logger.warning("Mediastack API key not provided. API calls will fail.") def _get_cache_key(self, endpoint: str, params: Dict[str, Any]) -> str: """ Generate a cache key for the request. Args: endpoint: The API endpoint. params: The request parameters. Returns: A cache key string. """ # Create a copy of the params to avoid modifying the original cache_params = params.copy() # Remove the API key from the cache key for security if "access_key" in cache_params: del cache_params["access_key"] # Create a string representation of the params params_str = json.dumps(cache_params, sort_keys=True) # Create a hash of the endpoint and params return f"mediastack:{endpoint}:{hashlib.md5(params_str.encode()).hexdigest()}" @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10), ) async def _make_request( self, endpoint: str, params: Dict[str, Any] = None ) -> Dict[str, Any]: """ Make a request to the Mediastack API. Args: endpoint: The API endpoint to request. params: Query parameters to include in the request. Returns: The API response as a dictionary. Raises: httpx.HTTPStatusError: If the request fails. """ url = f"{self.base_url}/{endpoint}" params = params or {} params["access_key"] = self.api_key # Generate cache key cache_key = self._get_cache_key(endpoint, params) # Check cache first cached_response = api_cache.get(cache_key) if cached_response: logger.info(f"Using cached response for {endpoint}") return cached_response # Make the request if not cached async with httpx.AsyncClient() as client: response = await client.get(url, params=params) response.raise_for_status() response_data = response.json() # Cache the response api_cache.set(cache_key, response_data) return response_data async def get_live_news( self, keywords: Optional[str] = None, sources: Optional[str] = None, categories: Optional[str] = None, countries: Optional[str] = None, languages: Optional[str] = None, limit: int = 25, offset: int = 0, sort: str = "published_desc", use_cache: bool = True, ) -> Dict[str, Any]: """ Get live news articles from the Mediastack API. Args: keywords: Keywords or phrases to search for in the news. sources: Comma-separated list of news sources to filter by. categories: Comma-separated list of news categories to filter by. countries: Comma-separated list of countries to filter by. languages: Comma-separated list of languages to filter by. limit: The number of results to return (default: 25, max: 100). offset: The number of results to skip (for pagination). sort: The order to sort results (published_desc or published_asc). use_cache: Whether to use cached responses if available. Returns: A dictionary containing the API response with news articles. """ params = { "limit": min(limit, 100), # Mediastack has a max limit of 100 "offset": offset, "sort": sort, } # Add optional filters if provided if keywords: params["keywords"] = keywords if sources: params["sources"] = sources if categories: params["categories"] = categories if countries: params["countries"] = countries if languages: params["languages"] = languages try: # If we don't want to use cache, invalidate it first if not use_cache: cache_key = self._get_cache_key("news", params) api_cache.delete(cache_key) return await self._make_request("news", params) except httpx.HTTPStatusError as e: logger.error(f"Error fetching news from Mediastack: {e}") raise async def get_sources(self) -> List[Dict[str, str]]: """ Get a list of available news sources from the Mediastack API. Note: This is a fake implementation since Mediastack doesn't appear to have a specific endpoint for listing sources. Returns: A list of news sources. """ # This is a placeholder. In reality, you'd need to extract sources from the # news articles or maintain your own list. return [ {"name": "CNN", "source_id": "cnn", "url": "https://cnn.com"}, {"name": "BBC", "source_id": "bbc", "url": "https://bbc.com"}, {"name": "Reuters", "source_id": "reuters", "url": "https://reuters.com"}, {"name": "New York Times", "source_id": "nytimes", "url": "https://nytimes.com"}, {"name": "The Guardian", "source_id": "guardian", "url": "https://theguardian.com"}, ] async def get_categories(self) -> List[Dict[str, str]]: """ Get a list of available news categories from the Mediastack API. Note: This is based on Mediastack's documentation. Returns: A list of news categories. """ # These are the categories supported by Mediastack according to documentation categories = [ "general", "business", "entertainment", "health", "science", "sports", "technology" ] return [{"name": category} for category in categories] # Create a default client instance for easy importing mediastack_client = MediastackClient()