from typing import List, Optional, Tuple from datetime import datetime import logging from sqlalchemy.orm import Session from sqlalchemy import desc, or_ from app.models.news import ( NewsArticle, NewsSource, NewsCategory, SavedArticle ) from app.schemas.news import ( NewsArticleCreate, NewsSourceCreate, NewsCategoryCreate ) from app.services.mediastack import mediastack_client logger = logging.getLogger(__name__) def get_news_source_by_source_id(db: Session, source_id: str) -> Optional[NewsSource]: """Get a news source by its source_id.""" return db.query(NewsSource).filter(NewsSource.source_id == source_id).first() def create_news_source(db: Session, source: NewsSourceCreate) -> NewsSource: """Create a new news source.""" db_source = NewsSource(**source.model_dump()) db.add(db_source) db.commit() db.refresh(db_source) return db_source def get_or_create_news_source(db: Session, source: NewsSourceCreate) -> NewsSource: """Get a news source by source_id or create it if it doesn't exist.""" db_source = get_news_source_by_source_id(db, source.source_id) if db_source: return db_source return create_news_source(db, source) def get_news_category_by_name(db: Session, name: str) -> Optional[NewsCategory]: """Get a news category by its name.""" return db.query(NewsCategory).filter(NewsCategory.name == name).first() def create_news_category(db: Session, category: NewsCategoryCreate) -> NewsCategory: """Create a new news category.""" db_category = NewsCategory(**category.model_dump()) db.add(db_category) db.commit() db.refresh(db_category) return db_category def get_or_create_news_category(db: Session, category: NewsCategoryCreate) -> NewsCategory: """Get a news category by name or create it if it doesn't exist.""" db_category = get_news_category_by_name(db, category.name) if db_category: return db_category return create_news_category(db, category) def get_news_article_by_url(db: Session, url: str) -> Optional[NewsArticle]: """Get a news article by its URL.""" return db.query(NewsArticle).filter(NewsArticle.url == url).first() def create_news_article(db: Session, article: NewsArticleCreate) -> NewsArticle: """Create a new news article.""" db_article = NewsArticle(**article.model_dump()) db.add(db_article) db.commit() db.refresh(db_article) return db_article def get_or_create_news_article(db: Session, article: NewsArticleCreate) -> Tuple[NewsArticle, bool]: """ Get a news article by URL or create it if it doesn't exist. Returns: A tuple containing the article and a boolean indicating if it was created. """ db_article = get_news_article_by_url(db, article.url) if db_article: return db_article, False return create_news_article(db, article), True def get_news_articles( db: Session, skip: int = 0, limit: int = 100, keywords: Optional[str] = None, source_ids: Optional[List[int]] = None, category_ids: Optional[List[int]] = None, countries: Optional[List[str]] = None, languages: Optional[List[str]] = None, ) -> Tuple[List[NewsArticle], int]: """ Get news articles with optional filtering. Returns: A tuple containing the list of articles and the total count. """ query = db.query(NewsArticle) # Apply filters if keywords: search_terms = [f"%{term.strip()}%" for term in keywords.split(",")] search_conditions = [] for term in search_terms: search_conditions.append(NewsArticle.title.ilike(term)) search_conditions.append(NewsArticle.description.ilike(term)) search_conditions.append(NewsArticle.content.ilike(term)) query = query.filter(or_(*search_conditions)) if source_ids: query = query.filter(NewsArticle.source_id.in_(source_ids)) if category_ids: query = query.filter(NewsArticle.category_id.in_(category_ids)) if countries: query = query.filter(NewsArticle.country.in_(countries)) if languages: query = query.filter(NewsArticle.language.in_(languages)) # Count total before pagination total = query.count() # Apply pagination and sorting query = query.order_by(desc(NewsArticle.published_at)).offset(skip).limit(limit) return query.all(), total def save_article_for_user( db: Session, user_id: int, article_id: int, notes: Optional[str] = None ) -> SavedArticle: """Save an article for a user.""" saved_article = SavedArticle( user_id=user_id, article_id=article_id, notes=notes, ) db.add(saved_article) db.commit() db.refresh(saved_article) return saved_article def get_saved_articles_for_user( db: Session, user_id: int, skip: int = 0, limit: int = 100 ) -> Tuple[List[SavedArticle], int]: """Get articles saved by a user.""" query = ( db.query(SavedArticle) .filter(SavedArticle.user_id == user_id) .join(NewsArticle) .order_by(desc(SavedArticle.created_at)) ) total = query.count() result = query.offset(skip).limit(limit).all() return result, total def delete_saved_article(db: Session, user_id: int, saved_article_id: int) -> bool: """ Delete a saved article for a user. Returns: True if the article was deleted, False if it wasn't found. """ saved_article = ( db.query(SavedArticle) .filter(SavedArticle.id == saved_article_id, SavedArticle.user_id == user_id) .first() ) if not saved_article: return False db.delete(saved_article) db.commit() return True async def fetch_and_store_news( db: Session, keywords: Optional[str] = None, sources: Optional[str] = None, categories: Optional[str] = None, countries: Optional[str] = None, languages: Optional[str] = None, limit: int = 100, ) -> List[NewsArticle]: """ Fetch news from the Mediastack API and store them in the database. Returns: A list of news articles that were fetched and stored. """ try: # Fetch news from the API response = await mediastack_client.get_live_news( keywords=keywords, sources=sources, categories=categories, countries=countries, languages=languages, limit=limit, ) stored_articles = [] # Process and store each article for article_data in response.get("data", []): # Skip articles without required fields if not article_data.get("title") or not article_data.get("url"): continue # Handle the source source = None if article_data.get("source"): source_create = NewsSourceCreate( name=article_data["source"], source_id=article_data["source"].lower().replace(" ", "_"), url=None, # API doesn't provide source URL ) source = get_or_create_news_source(db, source_create) # Handle the category category = None if article_data.get("category"): category_create = NewsCategoryCreate(name=article_data["category"]) category = get_or_create_news_category(db, category_create) # Parse the published date published_at = datetime.utcnow() if article_data.get("published_at"): try: published_at = datetime.fromisoformat(article_data["published_at"].replace("Z", "+00:00")) except ValueError: # If we can't parse the date, use current time pass # Create the article article_create = NewsArticleCreate( title=article_data["title"], description=article_data.get("description"), content=article_data.get("content"), url=article_data["url"], image_url=article_data.get("image"), published_at=published_at, author=article_data.get("author"), language=article_data.get("language"), country=article_data.get("country"), source_id=source.id if source else None, category_id=category.id if category else None, ) article, created = get_or_create_news_article(db, article_create) stored_articles.append(article) return stored_articles except Exception as e: logger.error(f"Error fetching news from Mediastack: {e}") return []