Automated Action 90c1cdef34 Setup News Aggregation Service
- Fix code linting issues
- Update README with detailed documentation
- Configure database paths for the current environment
- Create necessary directory structure

The News Aggregation Service is now ready to use with FastAPI and SQLite.
2025-05-27 18:50:11 +00:00

272 lines
8.7 KiB
Python

from typing import List, Optional, Tuple
from datetime import datetime
import logging
from sqlalchemy.orm import Session
from sqlalchemy import desc, or_
from app.models.news import (
NewsArticle, NewsSource, NewsCategory, SavedArticle
)
from app.schemas.news import (
NewsArticleCreate, NewsSourceCreate, NewsCategoryCreate
)
from app.services.mediastack import mediastack_client
logger = logging.getLogger(__name__)
def get_news_source_by_source_id(db: Session, source_id: str) -> Optional[NewsSource]:
"""Get a news source by its source_id."""
return db.query(NewsSource).filter(NewsSource.source_id == source_id).first()
def create_news_source(db: Session, source: NewsSourceCreate) -> NewsSource:
"""Create a new news source."""
db_source = NewsSource(**source.model_dump())
db.add(db_source)
db.commit()
db.refresh(db_source)
return db_source
def get_or_create_news_source(db: Session, source: NewsSourceCreate) -> NewsSource:
"""Get a news source by source_id or create it if it doesn't exist."""
db_source = get_news_source_by_source_id(db, source.source_id)
if db_source:
return db_source
return create_news_source(db, source)
def get_news_category_by_name(db: Session, name: str) -> Optional[NewsCategory]:
"""Get a news category by its name."""
return db.query(NewsCategory).filter(NewsCategory.name == name).first()
def create_news_category(db: Session, category: NewsCategoryCreate) -> NewsCategory:
"""Create a new news category."""
db_category = NewsCategory(**category.model_dump())
db.add(db_category)
db.commit()
db.refresh(db_category)
return db_category
def get_or_create_news_category(db: Session, category: NewsCategoryCreate) -> NewsCategory:
"""Get a news category by name or create it if it doesn't exist."""
db_category = get_news_category_by_name(db, category.name)
if db_category:
return db_category
return create_news_category(db, category)
def get_news_article_by_url(db: Session, url: str) -> Optional[NewsArticle]:
"""Get a news article by its URL."""
return db.query(NewsArticle).filter(NewsArticle.url == url).first()
def create_news_article(db: Session, article: NewsArticleCreate) -> NewsArticle:
"""Create a new news article."""
db_article = NewsArticle(**article.model_dump())
db.add(db_article)
db.commit()
db.refresh(db_article)
return db_article
def get_or_create_news_article(db: Session, article: NewsArticleCreate) -> Tuple[NewsArticle, bool]:
"""
Get a news article by URL or create it if it doesn't exist.
Returns:
A tuple containing the article and a boolean indicating if it was created.
"""
db_article = get_news_article_by_url(db, article.url)
if db_article:
return db_article, False
return create_news_article(db, article), True
def get_news_articles(
db: Session,
skip: int = 0,
limit: int = 100,
keywords: Optional[str] = None,
source_ids: Optional[List[int]] = None,
category_ids: Optional[List[int]] = None,
countries: Optional[List[str]] = None,
languages: Optional[List[str]] = None,
) -> Tuple[List[NewsArticle], int]:
"""
Get news articles with optional filtering.
Returns:
A tuple containing the list of articles and the total count.
"""
query = db.query(NewsArticle)
# Apply filters
if keywords:
search_terms = [f"%{term.strip()}%" for term in keywords.split(",")]
search_conditions = []
for term in search_terms:
search_conditions.append(NewsArticle.title.ilike(term))
search_conditions.append(NewsArticle.description.ilike(term))
search_conditions.append(NewsArticle.content.ilike(term))
query = query.filter(or_(*search_conditions))
if source_ids:
query = query.filter(NewsArticle.source_id.in_(source_ids))
if category_ids:
query = query.filter(NewsArticle.category_id.in_(category_ids))
if countries:
query = query.filter(NewsArticle.country.in_(countries))
if languages:
query = query.filter(NewsArticle.language.in_(languages))
# Count total before pagination
total = query.count()
# Apply pagination and sorting
query = query.order_by(desc(NewsArticle.published_at)).offset(skip).limit(limit)
return query.all(), total
def save_article_for_user(
db: Session, user_id: int, article_id: int, notes: Optional[str] = None
) -> SavedArticle:
"""Save an article for a user."""
saved_article = SavedArticle(
user_id=user_id,
article_id=article_id,
notes=notes,
)
db.add(saved_article)
db.commit()
db.refresh(saved_article)
return saved_article
def get_saved_articles_for_user(
db: Session, user_id: int, skip: int = 0, limit: int = 100
) -> Tuple[List[SavedArticle], int]:
"""Get articles saved by a user."""
query = (
db.query(SavedArticle)
.filter(SavedArticle.user_id == user_id)
.join(NewsArticle)
.order_by(desc(SavedArticle.created_at))
)
total = query.count()
result = query.offset(skip).limit(limit).all()
return result, total
def delete_saved_article(db: Session, user_id: int, saved_article_id: int) -> bool:
"""
Delete a saved article for a user.
Returns:
True if the article was deleted, False if it wasn't found.
"""
saved_article = (
db.query(SavedArticle)
.filter(SavedArticle.id == saved_article_id, SavedArticle.user_id == user_id)
.first()
)
if not saved_article:
return False
db.delete(saved_article)
db.commit()
return True
async def fetch_and_store_news(
db: Session,
keywords: Optional[str] = None,
sources: Optional[str] = None,
categories: Optional[str] = None,
countries: Optional[str] = None,
languages: Optional[str] = None,
limit: int = 100,
) -> List[NewsArticle]:
"""
Fetch news from the Mediastack API and store them in the database.
Returns:
A list of news articles that were fetched and stored.
"""
try:
# Fetch news from the API
response = await mediastack_client.get_live_news(
keywords=keywords,
sources=sources,
categories=categories,
countries=countries,
languages=languages,
limit=limit,
)
stored_articles = []
# Process and store each article
for article_data in response.get("data", []):
# Skip articles without required fields
if not article_data.get("title") or not article_data.get("url"):
continue
# Handle the source
source = None
if article_data.get("source"):
source_create = NewsSourceCreate(
name=article_data["source"],
source_id=article_data["source"].lower().replace(" ", "_"),
url=None, # API doesn't provide source URL
)
source = get_or_create_news_source(db, source_create)
# Handle the category
category = None
if article_data.get("category"):
category_create = NewsCategoryCreate(name=article_data["category"])
category = get_or_create_news_category(db, category_create)
# Parse the published date
published_at = datetime.utcnow()
if article_data.get("published_at"):
try:
published_at = datetime.fromisoformat(article_data["published_at"].replace("Z", "+00:00"))
except ValueError:
# If we can't parse the date, use current time
pass
# Create the article
article_create = NewsArticleCreate(
title=article_data["title"],
description=article_data.get("description"),
content=article_data.get("content"),
url=article_data["url"],
image_url=article_data.get("image"),
published_at=published_at,
author=article_data.get("author"),
language=article_data.get("language"),
country=article_data.get("country"),
source_id=source.id if source else None,
category_id=category.id if category else None,
)
article, created = get_or_create_news_article(db, article_create)
stored_articles.append(article)
return stored_articles
except Exception as e:
logger.error(f"Error fetching news from Mediastack: {e}")
return []