Automated Action 90c1cdef34 Setup News Aggregation Service
- Fix code linting issues
- Update README with detailed documentation
- Configure database paths for the current environment
- Create necessary directory structure

The News Aggregation Service is now ready to use with FastAPI and SQLite.
2025-05-27 18:50:11 +00:00

198 lines
6.9 KiB
Python

from typing import Dict, List, Optional, Any
import logging
import hashlib
import json
import httpx
from tenacity import retry, stop_after_attempt, wait_exponential
from app.core.config import settings
from app.core.cache import api_cache
logger = logging.getLogger(__name__)
class MediastackClient:
"""
Client for interacting with the Mediastack API.
"""
def __init__(self, api_key: str = None, base_url: str = None):
"""
Initialize the Mediastack API client.
Args:
api_key: The Mediastack API key. Defaults to settings.MEDIASTACK_API_KEY.
base_url: The base URL for the Mediastack API. Defaults to settings.MEDIASTACK_BASE_URL.
"""
self.api_key = api_key or settings.MEDIASTACK_API_KEY
self.base_url = base_url or settings.MEDIASTACK_BASE_URL
if not self.api_key:
logger.warning("Mediastack API key not provided. API calls will fail.")
def _get_cache_key(self, endpoint: str, params: Dict[str, Any]) -> str:
"""
Generate a cache key for the request.
Args:
endpoint: The API endpoint.
params: The request parameters.
Returns:
A cache key string.
"""
# Create a copy of the params to avoid modifying the original
cache_params = params.copy()
# Remove the API key from the cache key for security
if "access_key" in cache_params:
del cache_params["access_key"]
# Create a string representation of the params
params_str = json.dumps(cache_params, sort_keys=True)
# Create a hash of the endpoint and params
return f"mediastack:{endpoint}:{hashlib.md5(params_str.encode()).hexdigest()}"
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=2, max=10),
)
async def _make_request(
self, endpoint: str, params: Dict[str, Any] = None
) -> Dict[str, Any]:
"""
Make a request to the Mediastack API.
Args:
endpoint: The API endpoint to request.
params: Query parameters to include in the request.
Returns:
The API response as a dictionary.
Raises:
httpx.HTTPStatusError: If the request fails.
"""
url = f"{self.base_url}/{endpoint}"
params = params or {}
params["access_key"] = self.api_key
# Generate cache key
cache_key = self._get_cache_key(endpoint, params)
# Check cache first
cached_response = api_cache.get(cache_key)
if cached_response:
logger.info(f"Using cached response for {endpoint}")
return cached_response
# Make the request if not cached
async with httpx.AsyncClient() as client:
response = await client.get(url, params=params)
response.raise_for_status()
response_data = response.json()
# Cache the response
api_cache.set(cache_key, response_data)
return response_data
async def get_live_news(
self,
keywords: Optional[str] = None,
sources: Optional[str] = None,
categories: Optional[str] = None,
countries: Optional[str] = None,
languages: Optional[str] = None,
limit: int = 25,
offset: int = 0,
sort: str = "published_desc",
use_cache: bool = True,
) -> Dict[str, Any]:
"""
Get live news articles from the Mediastack API.
Args:
keywords: Keywords or phrases to search for in the news.
sources: Comma-separated list of news sources to filter by.
categories: Comma-separated list of news categories to filter by.
countries: Comma-separated list of countries to filter by.
languages: Comma-separated list of languages to filter by.
limit: The number of results to return (default: 25, max: 100).
offset: The number of results to skip (for pagination).
sort: The order to sort results (published_desc or published_asc).
use_cache: Whether to use cached responses if available.
Returns:
A dictionary containing the API response with news articles.
"""
params = {
"limit": min(limit, 100), # Mediastack has a max limit of 100
"offset": offset,
"sort": sort,
}
# Add optional filters if provided
if keywords:
params["keywords"] = keywords
if sources:
params["sources"] = sources
if categories:
params["categories"] = categories
if countries:
params["countries"] = countries
if languages:
params["languages"] = languages
try:
# If we don't want to use cache, invalidate it first
if not use_cache:
cache_key = self._get_cache_key("news", params)
api_cache.delete(cache_key)
return await self._make_request("news", params)
except httpx.HTTPStatusError as e:
logger.error(f"Error fetching news from Mediastack: {e}")
raise
async def get_sources(self) -> List[Dict[str, str]]:
"""
Get a list of available news sources from the Mediastack API.
Note: This is a fake implementation since Mediastack doesn't appear to have a
specific endpoint for listing sources.
Returns:
A list of news sources.
"""
# This is a placeholder. In reality, you'd need to extract sources from the
# news articles or maintain your own list.
return [
{"name": "CNN", "source_id": "cnn", "url": "https://cnn.com"},
{"name": "BBC", "source_id": "bbc", "url": "https://bbc.com"},
{"name": "Reuters", "source_id": "reuters", "url": "https://reuters.com"},
{"name": "New York Times", "source_id": "nytimes", "url": "https://nytimes.com"},
{"name": "The Guardian", "source_id": "guardian", "url": "https://theguardian.com"},
]
async def get_categories(self) -> List[Dict[str, str]]:
"""
Get a list of available news categories from the Mediastack API.
Note: This is based on Mediastack's documentation.
Returns:
A list of news categories.
"""
# These are the categories supported by Mediastack according to documentation
categories = [
"general", "business", "entertainment", "health",
"science", "sports", "technology"
]
return [{"name": category} for category in categories]
# Create a default client instance for easy importing
mediastack_client = MediastackClient()