
- Fix code linting issues - Update README with detailed documentation - Configure database paths for the current environment - Create necessary directory structure The News Aggregation Service is now ready to use with FastAPI and SQLite.
272 lines
8.7 KiB
Python
272 lines
8.7 KiB
Python
from typing import List, Optional, Tuple
|
|
from datetime import datetime
|
|
import logging
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy import desc, or_
|
|
|
|
from app.models.news import (
|
|
NewsArticle, NewsSource, NewsCategory, SavedArticle
|
|
)
|
|
from app.schemas.news import (
|
|
NewsArticleCreate, NewsSourceCreate, NewsCategoryCreate
|
|
)
|
|
from app.services.mediastack import mediastack_client
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_news_source_by_source_id(db: Session, source_id: str) -> Optional[NewsSource]:
|
|
"""Get a news source by its source_id."""
|
|
return db.query(NewsSource).filter(NewsSource.source_id == source_id).first()
|
|
|
|
|
|
def create_news_source(db: Session, source: NewsSourceCreate) -> NewsSource:
|
|
"""Create a new news source."""
|
|
db_source = NewsSource(**source.model_dump())
|
|
db.add(db_source)
|
|
db.commit()
|
|
db.refresh(db_source)
|
|
return db_source
|
|
|
|
|
|
def get_or_create_news_source(db: Session, source: NewsSourceCreate) -> NewsSource:
|
|
"""Get a news source by source_id or create it if it doesn't exist."""
|
|
db_source = get_news_source_by_source_id(db, source.source_id)
|
|
if db_source:
|
|
return db_source
|
|
return create_news_source(db, source)
|
|
|
|
|
|
def get_news_category_by_name(db: Session, name: str) -> Optional[NewsCategory]:
|
|
"""Get a news category by its name."""
|
|
return db.query(NewsCategory).filter(NewsCategory.name == name).first()
|
|
|
|
|
|
def create_news_category(db: Session, category: NewsCategoryCreate) -> NewsCategory:
|
|
"""Create a new news category."""
|
|
db_category = NewsCategory(**category.model_dump())
|
|
db.add(db_category)
|
|
db.commit()
|
|
db.refresh(db_category)
|
|
return db_category
|
|
|
|
|
|
def get_or_create_news_category(db: Session, category: NewsCategoryCreate) -> NewsCategory:
|
|
"""Get a news category by name or create it if it doesn't exist."""
|
|
db_category = get_news_category_by_name(db, category.name)
|
|
if db_category:
|
|
return db_category
|
|
return create_news_category(db, category)
|
|
|
|
|
|
def get_news_article_by_url(db: Session, url: str) -> Optional[NewsArticle]:
|
|
"""Get a news article by its URL."""
|
|
return db.query(NewsArticle).filter(NewsArticle.url == url).first()
|
|
|
|
|
|
def create_news_article(db: Session, article: NewsArticleCreate) -> NewsArticle:
|
|
"""Create a new news article."""
|
|
db_article = NewsArticle(**article.model_dump())
|
|
db.add(db_article)
|
|
db.commit()
|
|
db.refresh(db_article)
|
|
return db_article
|
|
|
|
|
|
def get_or_create_news_article(db: Session, article: NewsArticleCreate) -> Tuple[NewsArticle, bool]:
|
|
"""
|
|
Get a news article by URL or create it if it doesn't exist.
|
|
|
|
Returns:
|
|
A tuple containing the article and a boolean indicating if it was created.
|
|
"""
|
|
db_article = get_news_article_by_url(db, article.url)
|
|
if db_article:
|
|
return db_article, False
|
|
return create_news_article(db, article), True
|
|
|
|
|
|
def get_news_articles(
|
|
db: Session,
|
|
skip: int = 0,
|
|
limit: int = 100,
|
|
keywords: Optional[str] = None,
|
|
source_ids: Optional[List[int]] = None,
|
|
category_ids: Optional[List[int]] = None,
|
|
countries: Optional[List[str]] = None,
|
|
languages: Optional[List[str]] = None,
|
|
) -> Tuple[List[NewsArticle], int]:
|
|
"""
|
|
Get news articles with optional filtering.
|
|
|
|
Returns:
|
|
A tuple containing the list of articles and the total count.
|
|
"""
|
|
query = db.query(NewsArticle)
|
|
|
|
# Apply filters
|
|
if keywords:
|
|
search_terms = [f"%{term.strip()}%" for term in keywords.split(",")]
|
|
search_conditions = []
|
|
for term in search_terms:
|
|
search_conditions.append(NewsArticle.title.ilike(term))
|
|
search_conditions.append(NewsArticle.description.ilike(term))
|
|
search_conditions.append(NewsArticle.content.ilike(term))
|
|
query = query.filter(or_(*search_conditions))
|
|
|
|
if source_ids:
|
|
query = query.filter(NewsArticle.source_id.in_(source_ids))
|
|
|
|
if category_ids:
|
|
query = query.filter(NewsArticle.category_id.in_(category_ids))
|
|
|
|
if countries:
|
|
query = query.filter(NewsArticle.country.in_(countries))
|
|
|
|
if languages:
|
|
query = query.filter(NewsArticle.language.in_(languages))
|
|
|
|
# Count total before pagination
|
|
total = query.count()
|
|
|
|
# Apply pagination and sorting
|
|
query = query.order_by(desc(NewsArticle.published_at)).offset(skip).limit(limit)
|
|
|
|
return query.all(), total
|
|
|
|
|
|
def save_article_for_user(
|
|
db: Session, user_id: int, article_id: int, notes: Optional[str] = None
|
|
) -> SavedArticle:
|
|
"""Save an article for a user."""
|
|
saved_article = SavedArticle(
|
|
user_id=user_id,
|
|
article_id=article_id,
|
|
notes=notes,
|
|
)
|
|
db.add(saved_article)
|
|
db.commit()
|
|
db.refresh(saved_article)
|
|
return saved_article
|
|
|
|
|
|
def get_saved_articles_for_user(
|
|
db: Session, user_id: int, skip: int = 0, limit: int = 100
|
|
) -> Tuple[List[SavedArticle], int]:
|
|
"""Get articles saved by a user."""
|
|
query = (
|
|
db.query(SavedArticle)
|
|
.filter(SavedArticle.user_id == user_id)
|
|
.join(NewsArticle)
|
|
.order_by(desc(SavedArticle.created_at))
|
|
)
|
|
|
|
total = query.count()
|
|
result = query.offset(skip).limit(limit).all()
|
|
|
|
return result, total
|
|
|
|
|
|
def delete_saved_article(db: Session, user_id: int, saved_article_id: int) -> bool:
|
|
"""
|
|
Delete a saved article for a user.
|
|
|
|
Returns:
|
|
True if the article was deleted, False if it wasn't found.
|
|
"""
|
|
saved_article = (
|
|
db.query(SavedArticle)
|
|
.filter(SavedArticle.id == saved_article_id, SavedArticle.user_id == user_id)
|
|
.first()
|
|
)
|
|
|
|
if not saved_article:
|
|
return False
|
|
|
|
db.delete(saved_article)
|
|
db.commit()
|
|
return True
|
|
|
|
|
|
async def fetch_and_store_news(
|
|
db: Session,
|
|
keywords: Optional[str] = None,
|
|
sources: Optional[str] = None,
|
|
categories: Optional[str] = None,
|
|
countries: Optional[str] = None,
|
|
languages: Optional[str] = None,
|
|
limit: int = 100,
|
|
) -> List[NewsArticle]:
|
|
"""
|
|
Fetch news from the Mediastack API and store them in the database.
|
|
|
|
Returns:
|
|
A list of news articles that were fetched and stored.
|
|
"""
|
|
try:
|
|
# Fetch news from the API
|
|
response = await mediastack_client.get_live_news(
|
|
keywords=keywords,
|
|
sources=sources,
|
|
categories=categories,
|
|
countries=countries,
|
|
languages=languages,
|
|
limit=limit,
|
|
)
|
|
|
|
stored_articles = []
|
|
|
|
# Process and store each article
|
|
for article_data in response.get("data", []):
|
|
# Skip articles without required fields
|
|
if not article_data.get("title") or not article_data.get("url"):
|
|
continue
|
|
|
|
# Handle the source
|
|
source = None
|
|
if article_data.get("source"):
|
|
source_create = NewsSourceCreate(
|
|
name=article_data["source"],
|
|
source_id=article_data["source"].lower().replace(" ", "_"),
|
|
url=None, # API doesn't provide source URL
|
|
)
|
|
source = get_or_create_news_source(db, source_create)
|
|
|
|
# Handle the category
|
|
category = None
|
|
if article_data.get("category"):
|
|
category_create = NewsCategoryCreate(name=article_data["category"])
|
|
category = get_or_create_news_category(db, category_create)
|
|
|
|
# Parse the published date
|
|
published_at = datetime.utcnow()
|
|
if article_data.get("published_at"):
|
|
try:
|
|
published_at = datetime.fromisoformat(article_data["published_at"].replace("Z", "+00:00"))
|
|
except ValueError:
|
|
# If we can't parse the date, use current time
|
|
pass
|
|
|
|
# Create the article
|
|
article_create = NewsArticleCreate(
|
|
title=article_data["title"],
|
|
description=article_data.get("description"),
|
|
content=article_data.get("content"),
|
|
url=article_data["url"],
|
|
image_url=article_data.get("image"),
|
|
published_at=published_at,
|
|
author=article_data.get("author"),
|
|
language=article_data.get("language"),
|
|
country=article_data.get("country"),
|
|
source_id=source.id if source else None,
|
|
category_id=category.id if category else None,
|
|
)
|
|
|
|
article, created = get_or_create_news_article(db, article_create)
|
|
stored_articles.append(article)
|
|
|
|
return stored_articles
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching news from Mediastack: {e}")
|
|
return [] |