newsaggregationservice-ks0ts2/app/services/mediastack.py

from typing import Dict, List, Optional, Any
import logging
import hashlib
import json
import httpx
from tenacity import retry, stop_after_attempt, wait_exponential

from app.core.config import settings
from app.core.cache import api_cache

logger = logging.getLogger(__name__)


class MediastackClient:
    """
    Client for interacting with the Mediastack API.
    """

    def __init__(self, api_key: str = None, base_url: str = None):
        """
        Initialize the Mediastack API client.

        Args:
            api_key: The Mediastack API key. Defaults to settings.MEDIASTACK_API_KEY.
            base_url: The base URL for the Mediastack API. Defaults to settings.MEDIASTACK_BASE_URL.
        """
        self.api_key = api_key or settings.MEDIASTACK_API_KEY
        self.base_url = base_url or settings.MEDIASTACK_BASE_URL

        if not self.api_key:
            logger.warning("Mediastack API key not provided. API calls will fail.")

    def _get_cache_key(self, endpoint: str, params: Dict[str, Any]) -> str:
        """
        Generate a cache key for the request.

        Args:
            endpoint: The API endpoint.
            params: The request parameters.

        Returns:
            A cache key string.
        """
        # Create a copy of the params to avoid modifying the original
        cache_params = params.copy()

        # Remove the API key from the cache key for security
        if "access_key" in cache_params:
            del cache_params["access_key"]

        # Create a string representation of the params
        params_str = json.dumps(cache_params, sort_keys=True)

        # Create a hash of the endpoint and params
        return f"mediastack:{endpoint}:{hashlib.md5(params_str.encode()).hexdigest()}"

    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=2, max=10),
    )
    async def _make_request(
        self, endpoint: str, params: Dict[str, Any] = None
    ) -> Dict[str, Any]:
        """
        Make a request to the Mediastack API.

        Args:
            endpoint: The API endpoint to request.
            params: Query parameters to include in the request.

        Returns:
            The API response as a dictionary.

        Raises:
            httpx.HTTPStatusError: If the request fails.
        """
        url = f"{self.base_url}/{endpoint}"
        params = params or {}
        params["access_key"] = self.api_key

        # Generate cache key
        cache_key = self._get_cache_key(endpoint, params)

        # Check cache first
        cached_response = api_cache.get(cache_key)
        if cached_response:
            logger.info(f"Using cached response for {endpoint}")
            return cached_response

        # Make the request if not cached
        async with httpx.AsyncClient() as client:
            response = await client.get(url, params=params)
            response.raise_for_status()
            response_data = response.json()

            # Cache the response
            api_cache.set(cache_key, response_data)

            return response_data

    async def get_live_news(
        self,
        keywords: Optional[str] = None,
        sources: Optional[str] = None,
        categories: Optional[str] = None,
        countries: Optional[str] = None,
        languages: Optional[str] = None,
        limit: int = 25,
        offset: int = 0,
        sort: str = "published_desc",
        use_cache: bool = True,
    ) -> Dict[str, Any]:
        """
        Get live news articles from the Mediastack API.

        Args:
            keywords: Keywords or phrases to search for in the news.
            sources: Comma-separated list of news sources to filter by.
            categories: Comma-separated list of news categories to filter by.
            countries: Comma-separated list of countries to filter by.
            languages: Comma-separated list of languages to filter by.
            limit: The number of results to return (default: 25, max: 100).
            offset: The number of results to skip (for pagination).
            sort: The order to sort results (published_desc or published_asc).
            use_cache: Whether to use cached responses if available.

        Returns:
            A dictionary containing the API response with news articles.
        """
        params = {
            "limit": min(limit, 100),  # Mediastack has a max limit of 100
            "offset": offset,
            "sort": sort,
        }

        # Add optional filters if provided
        if keywords:
            params["keywords"] = keywords
        if sources:
            params["sources"] = sources
        if categories:
            params["categories"] = categories
        if countries:
            params["countries"] = countries
        if languages:
            params["languages"] = languages

        try:
            # If we don't want to use cache, invalidate it first
            if not use_cache:
                cache_key = self._get_cache_key("news", params)
                api_cache.delete(cache_key)

            return await self._make_request("news", params)
        except httpx.HTTPStatusError as e:
            logger.error(f"Error fetching news from Mediastack: {e}")
            raise

    async def get_sources(self) -> List[Dict[str, str]]:
        """
        Get a list of available news sources from the Mediastack API.

        Note: This is a fake implementation since Mediastack doesn't appear to have a
        specific endpoint for listing sources.

        Returns:
            A list of news sources.
        """
        # This is a placeholder. In reality, you'd need to extract sources from the
        # news articles or maintain your own list.
        return [
            {"name": "CNN", "source_id": "cnn", "url": "https://cnn.com"},
            {"name": "BBC", "source_id": "bbc", "url": "https://bbc.com"},
            {"name": "Reuters", "source_id": "reuters", "url": "https://reuters.com"},
            {"name": "New York Times", "source_id": "nytimes", "url": "https://nytimes.com"},
            {"name": "The Guardian", "source_id": "guardian", "url": "https://theguardian.com"},
        ]

    async def get_categories(self) -> List[Dict[str, str]]:
        """
        Get a list of available news categories from the Mediastack API.

        Note: This is based on Mediastack's documentation.

        Returns:
            A list of news categories.
        """
        # These are the categories supported by Mediastack according to documentation
        categories = [
            "general", "business", "entertainment", "health",
            "science", "sports", "technology"
        ]

        return [{"name": category} for category in categories]


# Create a default client instance for easy importing
mediastack_client = MediastackClient()