
- Fix code linting issues - Update README with detailed documentation - Configure database paths for the current environment - Create necessary directory structure The News Aggregation Service is now ready to use with FastAPI and SQLite.
198 lines
6.9 KiB
Python
198 lines
6.9 KiB
Python
from typing import Dict, List, Optional, Any
|
|
import logging
|
|
import hashlib
|
|
import json
|
|
import httpx
|
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
|
|
from app.core.config import settings
|
|
from app.core.cache import api_cache
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class MediastackClient:
|
|
"""
|
|
Client for interacting with the Mediastack API.
|
|
"""
|
|
|
|
def __init__(self, api_key: str = None, base_url: str = None):
|
|
"""
|
|
Initialize the Mediastack API client.
|
|
|
|
Args:
|
|
api_key: The Mediastack API key. Defaults to settings.MEDIASTACK_API_KEY.
|
|
base_url: The base URL for the Mediastack API. Defaults to settings.MEDIASTACK_BASE_URL.
|
|
"""
|
|
self.api_key = api_key or settings.MEDIASTACK_API_KEY
|
|
self.base_url = base_url or settings.MEDIASTACK_BASE_URL
|
|
|
|
if not self.api_key:
|
|
logger.warning("Mediastack API key not provided. API calls will fail.")
|
|
|
|
def _get_cache_key(self, endpoint: str, params: Dict[str, Any]) -> str:
|
|
"""
|
|
Generate a cache key for the request.
|
|
|
|
Args:
|
|
endpoint: The API endpoint.
|
|
params: The request parameters.
|
|
|
|
Returns:
|
|
A cache key string.
|
|
"""
|
|
# Create a copy of the params to avoid modifying the original
|
|
cache_params = params.copy()
|
|
|
|
# Remove the API key from the cache key for security
|
|
if "access_key" in cache_params:
|
|
del cache_params["access_key"]
|
|
|
|
# Create a string representation of the params
|
|
params_str = json.dumps(cache_params, sort_keys=True)
|
|
|
|
# Create a hash of the endpoint and params
|
|
return f"mediastack:{endpoint}:{hashlib.md5(params_str.encode()).hexdigest()}"
|
|
|
|
@retry(
|
|
stop=stop_after_attempt(3),
|
|
wait=wait_exponential(multiplier=1, min=2, max=10),
|
|
)
|
|
async def _make_request(
|
|
self, endpoint: str, params: Dict[str, Any] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Make a request to the Mediastack API.
|
|
|
|
Args:
|
|
endpoint: The API endpoint to request.
|
|
params: Query parameters to include in the request.
|
|
|
|
Returns:
|
|
The API response as a dictionary.
|
|
|
|
Raises:
|
|
httpx.HTTPStatusError: If the request fails.
|
|
"""
|
|
url = f"{self.base_url}/{endpoint}"
|
|
params = params or {}
|
|
params["access_key"] = self.api_key
|
|
|
|
# Generate cache key
|
|
cache_key = self._get_cache_key(endpoint, params)
|
|
|
|
# Check cache first
|
|
cached_response = api_cache.get(cache_key)
|
|
if cached_response:
|
|
logger.info(f"Using cached response for {endpoint}")
|
|
return cached_response
|
|
|
|
# Make the request if not cached
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(url, params=params)
|
|
response.raise_for_status()
|
|
response_data = response.json()
|
|
|
|
# Cache the response
|
|
api_cache.set(cache_key, response_data)
|
|
|
|
return response_data
|
|
|
|
async def get_live_news(
|
|
self,
|
|
keywords: Optional[str] = None,
|
|
sources: Optional[str] = None,
|
|
categories: Optional[str] = None,
|
|
countries: Optional[str] = None,
|
|
languages: Optional[str] = None,
|
|
limit: int = 25,
|
|
offset: int = 0,
|
|
sort: str = "published_desc",
|
|
use_cache: bool = True,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Get live news articles from the Mediastack API.
|
|
|
|
Args:
|
|
keywords: Keywords or phrases to search for in the news.
|
|
sources: Comma-separated list of news sources to filter by.
|
|
categories: Comma-separated list of news categories to filter by.
|
|
countries: Comma-separated list of countries to filter by.
|
|
languages: Comma-separated list of languages to filter by.
|
|
limit: The number of results to return (default: 25, max: 100).
|
|
offset: The number of results to skip (for pagination).
|
|
sort: The order to sort results (published_desc or published_asc).
|
|
use_cache: Whether to use cached responses if available.
|
|
|
|
Returns:
|
|
A dictionary containing the API response with news articles.
|
|
"""
|
|
params = {
|
|
"limit": min(limit, 100), # Mediastack has a max limit of 100
|
|
"offset": offset,
|
|
"sort": sort,
|
|
}
|
|
|
|
# Add optional filters if provided
|
|
if keywords:
|
|
params["keywords"] = keywords
|
|
if sources:
|
|
params["sources"] = sources
|
|
if categories:
|
|
params["categories"] = categories
|
|
if countries:
|
|
params["countries"] = countries
|
|
if languages:
|
|
params["languages"] = languages
|
|
|
|
try:
|
|
# If we don't want to use cache, invalidate it first
|
|
if not use_cache:
|
|
cache_key = self._get_cache_key("news", params)
|
|
api_cache.delete(cache_key)
|
|
|
|
return await self._make_request("news", params)
|
|
except httpx.HTTPStatusError as e:
|
|
logger.error(f"Error fetching news from Mediastack: {e}")
|
|
raise
|
|
|
|
async def get_sources(self) -> List[Dict[str, str]]:
|
|
"""
|
|
Get a list of available news sources from the Mediastack API.
|
|
|
|
Note: This is a fake implementation since Mediastack doesn't appear to have a
|
|
specific endpoint for listing sources.
|
|
|
|
Returns:
|
|
A list of news sources.
|
|
"""
|
|
# This is a placeholder. In reality, you'd need to extract sources from the
|
|
# news articles or maintain your own list.
|
|
return [
|
|
{"name": "CNN", "source_id": "cnn", "url": "https://cnn.com"},
|
|
{"name": "BBC", "source_id": "bbc", "url": "https://bbc.com"},
|
|
{"name": "Reuters", "source_id": "reuters", "url": "https://reuters.com"},
|
|
{"name": "New York Times", "source_id": "nytimes", "url": "https://nytimes.com"},
|
|
{"name": "The Guardian", "source_id": "guardian", "url": "https://theguardian.com"},
|
|
]
|
|
|
|
async def get_categories(self) -> List[Dict[str, str]]:
|
|
"""
|
|
Get a list of available news categories from the Mediastack API.
|
|
|
|
Note: This is based on Mediastack's documentation.
|
|
|
|
Returns:
|
|
A list of news categories.
|
|
"""
|
|
# These are the categories supported by Mediastack according to documentation
|
|
categories = [
|
|
"general", "business", "entertainment", "health",
|
|
"science", "sports", "technology"
|
|
]
|
|
|
|
return [{"name": category} for category in categories]
|
|
|
|
|
|
# Create a default client instance for easy importing
|
|
mediastack_client = MediastackClient() |