From 90c1cdef34e50046d30cf80b5ff15698e04354c9 Mon Sep 17 00:00:00 2001 From: Automated Action Date: Tue, 27 May 2025 18:50:11 +0000 Subject: [PATCH] Setup News Aggregation Service - Fix code linting issues - Update README with detailed documentation - Configure database paths for the current environment - Create necessary directory structure The News Aggregation Service is now ready to use with FastAPI and SQLite. --- README.md | 87 ++++++- alembic.ini | 85 +++++++ app/__init__.py | 0 app/api/__init__.py | 0 app/api/deps.py | 82 +++++++ app/api/v1/__init__.py | 0 app/api/v1/api.py | 7 + app/api/v1/news.py | 285 +++++++++++++++++++++++ app/api/v1/users.py | 172 ++++++++++++++ app/core/__init__.py | 0 app/core/cache.py | 98 ++++++++ app/core/config.py | 40 ++++ app/core/security.py | 62 +++++ app/core/utils.py | 15 ++ app/db/__init__.py | 0 app/db/session.py | 25 ++ app/models/__init__.py | 9 + app/models/base.py | 24 ++ app/models/news.py | 79 +++++++ app/models/user.py | 20 ++ app/schemas/__init__.py | 24 ++ app/schemas/news.py | 128 ++++++++++ app/schemas/token.py | 11 + app/schemas/user.py | 37 +++ app/services/__init__.py | 1 + app/services/background_tasks.py | 80 +++++++ app/services/mediastack.py | 198 ++++++++++++++++ app/services/news.py | 272 +++++++++++++++++++++ app/services/user.py | 152 ++++++++++++ main.py | 82 +++++++ migrations/__init__.py | 0 migrations/env.py | 90 +++++++ migrations/script.py.mako | 24 ++ migrations/versions/__init__.py | 0 migrations/versions/initial_migration.py | 143 ++++++++++++ requirements.txt | 13 ++ 36 files changed, 2343 insertions(+), 2 deletions(-) create mode 100644 alembic.ini create mode 100644 app/__init__.py create mode 100644 app/api/__init__.py create mode 100644 app/api/deps.py create mode 100644 app/api/v1/__init__.py create mode 100644 app/api/v1/api.py create mode 100644 app/api/v1/news.py create mode 100644 app/api/v1/users.py create mode 100644 app/core/__init__.py create mode 100644 app/core/cache.py create mode 100644 app/core/config.py create mode 100644 app/core/security.py create mode 100644 app/core/utils.py create mode 100644 app/db/__init__.py create mode 100644 app/db/session.py create mode 100644 app/models/__init__.py create mode 100644 app/models/base.py create mode 100644 app/models/news.py create mode 100644 app/models/user.py create mode 100644 app/schemas/__init__.py create mode 100644 app/schemas/news.py create mode 100644 app/schemas/token.py create mode 100644 app/schemas/user.py create mode 100644 app/services/__init__.py create mode 100644 app/services/background_tasks.py create mode 100644 app/services/mediastack.py create mode 100644 app/services/news.py create mode 100644 app/services/user.py create mode 100644 main.py create mode 100644 migrations/__init__.py create mode 100644 migrations/env.py create mode 100644 migrations/script.py.mako create mode 100644 migrations/versions/__init__.py create mode 100644 migrations/versions/initial_migration.py create mode 100644 requirements.txt diff --git a/README.md b/README.md index e8acfba..2e00019 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,86 @@ -# FastAPI Application +# News Aggregation Service -This is a FastAPI application bootstrapped by BackendIM, the AI-powered backend generation platform. +A FastAPI application that aggregates news from various sources using the Mediastack API. + +## Features + +- User authentication and authorization +- News article fetching and storage from Mediastack API +- Filtering news by keywords, sources, categories, countries, and languages +- User preferences for personalized news +- Save articles for later reading +- Background tasks for periodic news updates + +## Tech Stack + +- **FastAPI**: High-performance web framework +- **SQLite**: Database for storing news articles and user data +- **SQLAlchemy**: ORM for database interactions +- **Alembic**: Database migration tool +- **Pydantic**: Data validation and settings management +- **JWT**: Token-based authentication + +## Requirements + +- Python 3.8+ +- Mediastack API key + +## Setup + +1. Clone the repository +2. Install dependencies: + ```bash + pip install -r requirements.txt + ``` +3. Set up environment variables: + ``` + MEDIASTACK_API_KEY=your_api_key_here + SECRET_KEY=your_secret_key_here + ``` + +4. Run database migrations: + ```bash + alembic upgrade head + ``` + +5. Start the server: + ```bash + uvicorn main:app --reload + ``` + +## API Endpoints + +The API is documented with Swagger UI, available at `/docs` when the server is running. + +### Main Endpoints + +- `/api/v1/users/register`: Register a new user +- `/api/v1/users/token`: Login and get JWT token +- `/api/v1/news`: Get news articles with optional filtering +- `/api/v1/news/personalized`: Get personalized news based on user preferences +- `/api/v1/news/saved`: Save articles for later reading +- `/health`: Health check endpoint + +## Project Structure + +- `app/api`: API routes and endpoints +- `app/core`: Core functionality (config, security, utils) +- `app/db`: Database connection and session +- `app/models`: SQLAlchemy models +- `app/schemas`: Pydantic schemas for validation +- `app/services`: Business logic services +- `migrations`: Database migrations + +## Development + +To run linting checks: + +```bash +ruff check . +``` + +To fix linting issues automatically: + +```bash +ruff check --fix . +``` \ No newline at end of file diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..cf245fd --- /dev/null +++ b/alembic.ini @@ -0,0 +1,85 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = migrations + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +#truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to migrations/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat migrations/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# SQLite URL +sqlalchemy.url = sqlite:////projects/newsaggregationservice-ks0ts2/app/storage/db/db.sqlite + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks=black +# black.type=console_scripts +# black.entrypoint=black +# black.options=-l 79 + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/deps.py b/app/api/deps.py new file mode 100644 index 0000000..9a290e5 --- /dev/null +++ b/app/api/deps.py @@ -0,0 +1,82 @@ +from typing import Generator + +from fastapi import Depends, HTTPException, status +from jose import jwt, JWTError +from pydantic import ValidationError +from sqlalchemy.orm import Session + +from app.core.config import settings +from app.core.security import ALGORITHM, oauth2_scheme +from app.db.session import SessionLocal +from app.models.user import User +from app.schemas.token import TokenPayload +from app.services.user import get_user + + +def get_db() -> Generator: + """ + Dependency to get a database session. + """ + try: + db = SessionLocal() + yield db + finally: + db.close() + + +def get_current_user( + db: Session = Depends(get_db), token: str = Depends(oauth2_scheme) +) -> User: + """ + Dependency to get the current authenticated user. + """ + try: + payload = jwt.decode( + token, settings.SECRET_KEY, algorithms=[ALGORITHM] + ) + token_data = TokenPayload(**payload) + except (JWTError, ValidationError): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Could not validate credentials", + headers={"WWW-Authenticate": "Bearer"}, + ) + + user = get_user(db, user_id=token_data.sub) + if not user: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="User not found", + ) + + return user + + +def get_current_active_user( + current_user: User = Depends(get_current_user), +) -> User: + """ + Dependency to get the current active user. + """ + if not current_user.is_active: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Inactive user", + ) + + return current_user + + +def get_current_active_superuser( + current_user: User = Depends(get_current_active_user), +) -> User: + """ + Dependency to get the current active superuser. + """ + if not current_user.is_superuser: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="The user doesn't have enough privileges", + ) + + return current_user \ No newline at end of file diff --git a/app/api/v1/__init__.py b/app/api/v1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/v1/api.py b/app/api/v1/api.py new file mode 100644 index 0000000..e5b25b3 --- /dev/null +++ b/app/api/v1/api.py @@ -0,0 +1,7 @@ +from fastapi import APIRouter + +from app.api.v1 import news, users + +api_router = APIRouter() +api_router.include_router(news.router, prefix="/news", tags=["news"]) +api_router.include_router(users.router, prefix="/users", tags=["users"]) \ No newline at end of file diff --git a/app/api/v1/news.py b/app/api/v1/news.py new file mode 100644 index 0000000..278eddd --- /dev/null +++ b/app/api/v1/news.py @@ -0,0 +1,285 @@ +from typing import List, Optional +from fastapi import APIRouter, Depends, HTTPException, status, BackgroundTasks +from sqlalchemy.orm import Session + +from app.api import deps +from app.models.user import User +from app.models.news import NewsArticle, NewsSource, NewsCategory, SavedArticle +from app.schemas.news import ( + NewsSource as NewsSourceSchema, + NewsCategory as NewsCategorySchema, + SavedArticle as SavedArticleSchema, + SavedArticleCreate, + NewsResponse, +) +from app.services.news import ( + get_news_articles, + fetch_and_store_news, + get_saved_articles_for_user, + save_article_for_user, + delete_saved_article, +) +from app.services.user import get_user_preference + +router = APIRouter() + + +@router.get("/", response_model=NewsResponse) +async def get_news( + db: Session = Depends(deps.get_db), + skip: int = 0, + limit: int = 25, + keywords: Optional[str] = None, + sources: Optional[str] = None, + categories: Optional[str] = None, + countries: Optional[str] = None, + languages: Optional[str] = None, + current_user: Optional[User] = Depends(deps.get_current_active_user), + refresh: bool = False, + background_tasks: BackgroundTasks = None, +): + """ + Retrieve news articles with optional filtering. + + If refresh is True, the API will fetch fresh news from Mediastack. + Otherwise, it will return news articles from the database. + """ + # Parse filters + source_ids = None + if sources: + source_names = [s.strip() for s in sources.split(",")] + source_query = db.query(NewsSource).filter(NewsSource.name.in_(source_names)) + source_ids = [source.id for source in source_query.all()] + + category_ids = None + if categories: + category_names = [c.strip() for c in categories.split(",")] + category_query = db.query(NewsCategory).filter(NewsCategory.name.in_(category_names)) + category_ids = [category.id for category in category_query.all()] + + country_list = None + if countries: + country_list = [c.strip() for c in countries.split(",")] + + language_list = None + if languages: + language_list = [lang.strip() for lang in languages.split(",")] + + # If refresh is requested, fetch news from the API + if refresh and background_tasks: + background_tasks.add_task( + fetch_and_store_news, + db, + keywords=keywords, + sources=sources, + categories=categories, + countries=countries, + languages=languages, + ) + + # Get news from the database + articles, total = get_news_articles( + db, + skip=skip, + limit=limit, + keywords=keywords, + source_ids=source_ids, + category_ids=category_ids, + countries=country_list, + languages=language_list, + ) + + return {"items": articles, "total": total} + + +@router.get("/personalized", response_model=NewsResponse) +async def get_personalized_news( + db: Session = Depends(deps.get_db), + skip: int = 0, + limit: int = 25, + current_user: User = Depends(deps.get_current_active_user), + refresh: bool = False, + background_tasks: BackgroundTasks = None, +): + """ + Retrieve news articles based on the user's preferences. + """ + # Get user preferences + preferences = get_user_preference(db, current_user.id) + if not preferences: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="User preferences not found", + ) + + # If refresh is requested, fetch news from the API + if refresh and background_tasks: + background_tasks.add_task( + fetch_and_store_news, + db, + keywords=preferences.keywords, + sources=preferences.sources, + categories=preferences.categories, + countries=preferences.countries, + languages=preferences.languages, + ) + + # Parse filters from preferences + source_ids = None + if preferences.sources: + source_names = [s.strip() for s in preferences.sources.split(",")] + source_query = db.query(NewsSource).filter(NewsSource.name.in_(source_names)) + source_ids = [source.id for source in source_query.all()] + + category_ids = None + if preferences.categories: + category_names = [c.strip() for c in preferences.categories.split(",")] + category_query = db.query(NewsCategory).filter(NewsCategory.name.in_(category_names)) + category_ids = [category.id for category in category_query.all()] + + country_list = None + if preferences.countries: + country_list = [c.strip() for c in preferences.countries.split(",")] + + language_list = None + if preferences.languages: + language_list = [lang.strip() for lang in preferences.languages.split(",")] + + # Get news from the database + articles, total = get_news_articles( + db, + skip=skip, + limit=limit, + keywords=preferences.keywords, + source_ids=source_ids, + category_ids=category_ids, + countries=country_list, + languages=language_list, + ) + + return {"items": articles, "total": total} + + +@router.get("/refresh", response_model=NewsResponse) +async def refresh_news( + db: Session = Depends(deps.get_db), + keywords: Optional[str] = None, + sources: Optional[str] = None, + categories: Optional[str] = None, + countries: Optional[str] = None, + languages: Optional[str] = None, + limit: int = 100, + current_user: User = Depends(deps.get_current_active_user), +): + """ + Fetch fresh news from Mediastack API and return them. + + This endpoint will fetch news from the API and store them in the database. + """ + articles = await fetch_and_store_news( + db, + keywords=keywords, + sources=sources, + categories=categories, + countries=countries, + languages=languages, + limit=limit, + ) + + return {"items": articles, "total": len(articles)} + + +@router.get("/sources", response_model=List[NewsSourceSchema]) +async def get_sources( + db: Session = Depends(deps.get_db), +): + """ + Get a list of news sources. + """ + sources = db.query(NewsSource).all() + return sources + + +@router.get("/categories", response_model=List[NewsCategorySchema]) +async def get_categories( + db: Session = Depends(deps.get_db), +): + """ + Get a list of news categories. + """ + categories = db.query(NewsCategory).all() + return categories + + +@router.get("/saved", response_model=List[SavedArticleSchema]) +async def get_saved_articles( + db: Session = Depends(deps.get_db), + skip: int = 0, + limit: int = 100, + current_user: User = Depends(deps.get_current_active_user), +): + """ + Get articles saved by the current user. + """ + saved_articles, total = get_saved_articles_for_user( + db, user_id=current_user.id, skip=skip, limit=limit + ) + return saved_articles + + +@router.post("/saved", response_model=SavedArticleSchema) +async def save_article( + *, + db: Session = Depends(deps.get_db), + article_in: SavedArticleCreate, + current_user: User = Depends(deps.get_current_active_user), +): + """ + Save an article for the current user. + """ + # Check if article exists + article = db.query(NewsArticle).filter(NewsArticle.id == article_in.article_id).first() + if not article: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Article not found", + ) + + # Check if already saved + existing = ( + db.query(SavedArticle) + .filter( + SavedArticle.user_id == current_user.id, + SavedArticle.article_id == article_in.article_id, + ) + .first() + ) + + if existing: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Article already saved", + ) + + saved_article = save_article_for_user( + db, user_id=current_user.id, article_id=article_in.article_id, notes=article_in.notes + ) + return saved_article + + +@router.delete("/saved/{saved_article_id}", status_code=status.HTTP_204_NO_CONTENT, response_model=None) +async def remove_saved_article( + saved_article_id: int, + db: Session = Depends(deps.get_db), + current_user: User = Depends(deps.get_current_active_user), +): + """ + Remove a saved article for the current user. + """ + success = delete_saved_article(db, user_id=current_user.id, saved_article_id=saved_article_id) + if not success: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Saved article not found", + ) + return None \ No newline at end of file diff --git a/app/api/v1/users.py b/app/api/v1/users.py new file mode 100644 index 0000000..7558e76 --- /dev/null +++ b/app/api/v1/users.py @@ -0,0 +1,172 @@ +from typing import Any, List +from datetime import timedelta + +from fastapi import APIRouter, Depends, HTTPException, status +from fastapi.security import OAuth2PasswordRequestForm +from sqlalchemy.orm import Session + +from app.api import deps +from app.core.config import settings +from app.core.security import create_access_token +from app.models.user import User +from app.schemas.user import User as UserSchema, UserCreate, UserUpdate +from app.schemas.news import UserPreference as UserPreferenceSchema, UserPreferenceUpdate +from app.schemas.token import Token +from app.services.user import ( + authenticate_user, + create_user, + update_user, + get_user_by_email, + get_user, + get_user_preference, + update_user_preference, +) + +router = APIRouter() + + +@router.post("/token", response_model=Token) +async def login_for_access_token( + db: Session = Depends(deps.get_db), + form_data: OAuth2PasswordRequestForm = Depends(), +) -> Any: + """ + OAuth2 compatible token login, get an access token for future requests. + """ + user = authenticate_user(db, form_data.username, form_data.password) + if not user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + headers={"WWW-Authenticate": "Bearer"}, + ) + + access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) + return { + "access_token": create_access_token( + user.id, expires_delta=access_token_expires + ), + "token_type": "bearer", + } + + +@router.post("/register", response_model=UserSchema) +async def register_user( + *, + db: Session = Depends(deps.get_db), + user_in: UserCreate, +) -> Any: + """ + Register a new user. + """ + try: + user = create_user(db, user_in) + return user + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + +@router.get("/me", response_model=UserSchema) +async def read_users_me( + current_user: User = Depends(deps.get_current_active_user), +) -> Any: + """ + Get current user. + """ + return current_user + + +@router.put("/me", response_model=UserSchema) +async def update_user_me( + *, + db: Session = Depends(deps.get_db), + user_in: UserUpdate, + current_user: User = Depends(deps.get_current_active_user), +) -> Any: + """ + Update current user. + """ + if user_in.email and user_in.email != current_user.email: + if get_user_by_email(db, user_in.email): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Email already registered", + ) + + user = update_user(db, current_user, user_in) + return user + + +@router.get("/me/preferences", response_model=UserPreferenceSchema) +async def read_user_preferences( + db: Session = Depends(deps.get_db), + current_user: User = Depends(deps.get_current_active_user), +) -> Any: + """ + Get current user's preferences. + """ + preferences = get_user_preference(db, current_user.id) + if not preferences: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="User preferences not found", + ) + return preferences + + +@router.put("/me/preferences", response_model=UserPreferenceSchema) +async def update_user_preferences( + *, + db: Session = Depends(deps.get_db), + preferences_in: UserPreferenceUpdate, + current_user: User = Depends(deps.get_current_active_user), +) -> Any: + """ + Update current user's preferences. + """ + preferences = update_user_preference( + db, + current_user.id, + keywords=preferences_in.keywords, + sources=preferences_in.sources, + categories=preferences_in.categories, + countries=preferences_in.countries, + languages=preferences_in.languages, + ) + return preferences + + +# Admin endpoints +@router.get("/", response_model=List[UserSchema]) +async def read_users( + db: Session = Depends(deps.get_db), + skip: int = 0, + limit: int = 100, + current_user: User = Depends(deps.get_current_active_superuser), +) -> Any: + """ + Retrieve users. Only for superusers. + """ + users = db.query(User).offset(skip).limit(limit).all() + return users + + +@router.get("/{user_id}", response_model=UserSchema) +async def read_user( + user_id: int, + db: Session = Depends(deps.get_db), + current_user: User = Depends(deps.get_current_active_superuser), +) -> Any: + """ + Get a specific user by id. Only for superusers. + """ + user = get_user(db, user_id=user_id) + if not user: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="User not found", + ) + return user \ No newline at end of file diff --git a/app/core/__init__.py b/app/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/core/cache.py b/app/core/cache.py new file mode 100644 index 0000000..aff4254 --- /dev/null +++ b/app/core/cache.py @@ -0,0 +1,98 @@ +import time +from typing import Dict, Any, Optional, Callable, TypeVar, Generic +import logging + +logger = logging.getLogger(__name__) + +T = TypeVar('T') + + +class Cache(Generic[T]): + """ + A simple in-memory cache implementation with TTL (time to live). + """ + + def __init__(self, ttl: int = 300): + """ + Initialize the cache. + + Args: + ttl: Time to live for cache entries in seconds. Default is 300 seconds (5 minutes). + """ + self._cache: Dict[str, Dict[str, Any]] = {} + self.ttl = ttl + + def get(self, key: str) -> Optional[T]: + """ + Get a value from the cache. + + Args: + key: The cache key. + + Returns: + The cached value if it exists and hasn't expired, None otherwise. + """ + if key in self._cache: + entry = self._cache[key] + if entry["expires"] > time.time(): + logger.debug(f"Cache hit for key: {key}") + return entry["value"] + else: + # Remove expired entry + logger.debug(f"Cache expired for key: {key}") + del self._cache[key] + return None + + def set(self, key: str, value: T, ttl: Optional[int] = None) -> None: + """ + Set a value in the cache. + + Args: + key: The cache key. + value: The value to cache. + ttl: Optional custom TTL for this entry. If not provided, the default TTL is used. + """ + expiry = time.time() + (ttl if ttl is not None else self.ttl) + self._cache[key] = {"value": value, "expires": expiry} + logger.debug(f"Cache set for key: {key}") + + def delete(self, key: str) -> None: + """ + Delete a value from the cache. + + Args: + key: The cache key. + """ + if key in self._cache: + del self._cache[key] + logger.debug(f"Cache deleted for key: {key}") + + def clear(self) -> None: + """ + Clear all entries from the cache. + """ + self._cache.clear() + logger.debug("Cache cleared") + + def get_or_set(self, key: str, value_func: Callable[[], T], ttl: Optional[int] = None) -> T: + """ + Get a value from the cache, or set it if it doesn't exist. + + Args: + key: The cache key. + value_func: A function that returns the value to cache if it doesn't exist. + ttl: Optional custom TTL for this entry. If not provided, the default TTL is used. + + Returns: + The cached value. + """ + value = self.get(key) + if value is None: + value = value_func() + self.set(key, value, ttl) + return value + + +# Create global cache instance +cache = Cache(ttl=300) # 5 minutes TTL +api_cache = Cache(ttl=60) # 1 minute TTL for API responses \ No newline at end of file diff --git a/app/core/config.py b/app/core/config.py new file mode 100644 index 0000000..c83f31b --- /dev/null +++ b/app/core/config.py @@ -0,0 +1,40 @@ +from typing import List, Union +from pathlib import Path + +from pydantic import AnyHttpUrl, field_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + model_config = SettingsConfigDict(env_file=".env", case_sensitive=True) + + API_V1_STR: str = "/api/v1" + PROJECT_NAME: str = "News Aggregation Service" + VERSION: str = "0.1.0" + DESCRIPTION: str = "A service that aggregates news from various sources using the Mediastack API" + + # CORS + BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = [] + + @field_validator("BACKEND_CORS_ORIGINS", mode="before") + @classmethod + def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]: + if isinstance(v, str) and not v.startswith("["): + return [i.strip() for i in v.split(",")] + elif isinstance(v, (list, str)): + return v + raise ValueError(v) + + # Mediastack API + MEDIASTACK_API_KEY: str = "" + MEDIASTACK_BASE_URL: str = "http://api.mediastack.com/v1" + + # Security + SECRET_KEY: str = "your-secret-key-here" + ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 8 # 8 days + + # Database + DATABASE_PATH: Path = Path("/projects/newsaggregationservice-ks0ts2/app/storage/db/db.sqlite") + + +settings = Settings() \ No newline at end of file diff --git a/app/core/security.py b/app/core/security.py new file mode 100644 index 0000000..8b18cd6 --- /dev/null +++ b/app/core/security.py @@ -0,0 +1,62 @@ +from datetime import datetime, timedelta +from typing import Any, Optional, Union + +from jose import jwt +from passlib.context import CryptContext +from fastapi.security import OAuth2PasswordBearer + +from app.core.config import settings + +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") +oauth2_scheme = OAuth2PasswordBearer(tokenUrl=f"{settings.API_V1_STR}/users/token") + +ALGORITHM = "HS256" + + +def create_access_token(subject: Union[str, Any], expires_delta: Optional[timedelta] = None) -> str: + """ + Create a JWT access token. + + Args: + subject: The subject of the token, typically a user ID. + expires_delta: An optional timedelta for when the token should expire. + + Returns: + A JWT token as a string. + """ + if expires_delta: + expire = datetime.utcnow() + expires_delta + else: + expire = datetime.utcnow() + timedelta( + minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES + ) + to_encode = {"exp": expire, "sub": str(subject)} + encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=ALGORITHM) + return encoded_jwt + + +def verify_password(plain_password: str, hashed_password: str) -> bool: + """ + Verify a password against a hash. + + Args: + plain_password: The plaintext password. + hashed_password: The hashed password. + + Returns: + True if the password matches the hash, False otherwise. + """ + return pwd_context.verify(plain_password, hashed_password) + + +def get_password_hash(password: str) -> str: + """ + Hash a password. + + Args: + password: The plaintext password. + + Returns: + The hashed password. + """ + return pwd_context.hash(password) \ No newline at end of file diff --git a/app/core/utils.py b/app/core/utils.py new file mode 100644 index 0000000..1541411 --- /dev/null +++ b/app/core/utils.py @@ -0,0 +1,15 @@ +import logging + + +logger = logging.getLogger(__name__) + + +def configure_logging(): + """Configure logging for the application.""" + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + # Set lower log levels for some chatty modules + logging.getLogger("httpx").setLevel(logging.WARNING) + logging.getLogger("sqlalchemy.engine").setLevel(logging.WARNING) \ No newline at end of file diff --git a/app/db/__init__.py b/app/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/db/session.py b/app/db/session.py new file mode 100644 index 0000000..a620c22 --- /dev/null +++ b/app/db/session.py @@ -0,0 +1,25 @@ +from pathlib import Path +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker + +DB_DIR = Path("/projects/newsaggregationservice-ks0ts2/app/storage/db") +DB_DIR.mkdir(parents=True, exist_ok=True) + +SQLALCHEMY_DATABASE_URL = f"sqlite:///{DB_DIR}/db.sqlite" + +engine = create_engine( + SQLALCHEMY_DATABASE_URL, + connect_args={"check_same_thread": False} +) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +Base = declarative_base() + + +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() \ No newline at end of file diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..e5385bc --- /dev/null +++ b/app/models/__init__.py @@ -0,0 +1,9 @@ +from app.models.base import Base, TimestampMixin, TableNameMixin # noqa +from app.models.user import User # noqa +from app.models.news import ( # noqa + NewsSource, + NewsCategory, + NewsArticle, + SavedArticle, + UserPreference, +) \ No newline at end of file diff --git a/app/models/base.py b/app/models/base.py new file mode 100644 index 0000000..9bc272f --- /dev/null +++ b/app/models/base.py @@ -0,0 +1,24 @@ +from sqlalchemy import Column, DateTime +from sqlalchemy.ext.declarative import declared_attr +from datetime import datetime + + + +class TimestampMixin: + """Mixin that adds created_at and updated_at columns to models.""" + + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column( + DateTime, + default=datetime.utcnow, + onupdate=datetime.utcnow, + nullable=False + ) + + +class TableNameMixin: + """Mixin that automatically sets the table name based on the class name.""" + + @declared_attr + def __tablename__(cls) -> str: + return cls.__name__.lower() \ No newline at end of file diff --git a/app/models/news.py b/app/models/news.py new file mode 100644 index 0000000..5748627 --- /dev/null +++ b/app/models/news.py @@ -0,0 +1,79 @@ +from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey +from sqlalchemy.orm import relationship + +from app.db.session import Base +from app.models.base import TimestampMixin, TableNameMixin + + +class NewsSource(Base, TimestampMixin, TableNameMixin): + """Model for news sources.""" + + id = Column(Integer, primary_key=True, index=True) + name = Column(String, index=True, nullable=False) + source_id = Column(String, unique=True, index=True, nullable=False) + url = Column(String, nullable=True) + + # Relationships + articles = relationship("NewsArticle", back_populates="source") + + +class NewsCategory(Base, TimestampMixin, TableNameMixin): + """Model for news categories.""" + + id = Column(Integer, primary_key=True, index=True) + name = Column(String, unique=True, index=True, nullable=False) + + # Relationships + articles = relationship("NewsArticle", back_populates="category") + + +class NewsArticle(Base, TimestampMixin, TableNameMixin): + """Model for news articles.""" + + id = Column(Integer, primary_key=True, index=True) + title = Column(String, index=True, nullable=False) + description = Column(Text, nullable=True) + content = Column(Text, nullable=True) + url = Column(String, unique=True, index=True, nullable=False) + image_url = Column(String, nullable=True) + published_at = Column(DateTime, index=True, nullable=False) + author = Column(String, nullable=True) + language = Column(String, nullable=True) + country = Column(String, nullable=True) + + # Foreign keys + source_id = Column(Integer, ForeignKey("newssource.id"), nullable=True) + category_id = Column(Integer, ForeignKey("newscategory.id"), nullable=True) + + # Relationships + source = relationship("NewsSource", back_populates="articles") + category = relationship("NewsCategory", back_populates="articles") + saved_by = relationship("SavedArticle", back_populates="article", cascade="all, delete-orphan") + + +class SavedArticle(Base, TimestampMixin, TableNameMixin): + """Model for articles saved by users.""" + + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer, ForeignKey("user.id"), nullable=False) + article_id = Column(Integer, ForeignKey("newsarticle.id"), nullable=False) + notes = Column(Text, nullable=True) + + # Relationships + user = relationship("User", back_populates="saved_articles") + article = relationship("NewsArticle", back_populates="saved_by") + + +class UserPreference(Base, TimestampMixin, TableNameMixin): + """Model for user preferences.""" + + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer, ForeignKey("user.id"), nullable=False) + keywords = Column(String, nullable=True) + sources = Column(String, nullable=True) # Comma-separated source IDs + categories = Column(String, nullable=True) # Comma-separated category names + countries = Column(String, nullable=True) # Comma-separated country codes + languages = Column(String, nullable=True) # Comma-separated language codes + + # Relationships + user = relationship("User", back_populates="preferences") \ No newline at end of file diff --git a/app/models/user.py b/app/models/user.py new file mode 100644 index 0000000..7222de0 --- /dev/null +++ b/app/models/user.py @@ -0,0 +1,20 @@ +from sqlalchemy import Boolean, Column, Integer, String +from sqlalchemy.orm import relationship + +from app.db.session import Base +from app.models.base import TimestampMixin, TableNameMixin + + +class User(Base, TimestampMixin, TableNameMixin): + """User model for authentication and personalization.""" + + id = Column(Integer, primary_key=True, index=True) + email = Column(String, unique=True, index=True, nullable=False) + username = Column(String, unique=True, index=True, nullable=False) + hashed_password = Column(String, nullable=False) + is_active = Column(Boolean(), default=True) + is_superuser = Column(Boolean(), default=False) + + # Relationships will be added here + preferences = relationship("UserPreference", back_populates="user", cascade="all, delete-orphan") + saved_articles = relationship("SavedArticle", back_populates="user", cascade="all, delete-orphan") \ No newline at end of file diff --git a/app/schemas/__init__.py b/app/schemas/__init__.py new file mode 100644 index 0000000..39f3f74 --- /dev/null +++ b/app/schemas/__init__.py @@ -0,0 +1,24 @@ +from app.schemas.user import ( # noqa + User, + UserCreate, + UserUpdate, + UserInDB, +) +from app.schemas.news import ( # noqa + NewsSource, + NewsSourceCreate, + NewsCategory, + NewsCategoryCreate, + NewsArticle, + NewsArticleCreate, + NewsArticleUpdate, + UserPreference, + UserPreferenceCreate, + UserPreferenceUpdate, + SavedArticle, + SavedArticleCreate, +) +from app.schemas.token import ( # noqa + Token, + TokenPayload, +) \ No newline at end of file diff --git a/app/schemas/news.py b/app/schemas/news.py new file mode 100644 index 0000000..4d82d2d --- /dev/null +++ b/app/schemas/news.py @@ -0,0 +1,128 @@ +from typing import List, Optional +from datetime import datetime +from pydantic import BaseModel + + +class NewsSourceBase(BaseModel): + name: str + source_id: str + url: Optional[str] = None + + +class NewsSourceCreate(NewsSourceBase): + pass + + +class NewsSource(NewsSourceBase): + id: int + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True + + +class NewsCategoryBase(BaseModel): + name: str + + +class NewsCategoryCreate(NewsCategoryBase): + pass + + +class NewsCategory(NewsCategoryBase): + id: int + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True + + +class NewsArticleBase(BaseModel): + title: str + description: Optional[str] = None + content: Optional[str] = None + url: str + image_url: Optional[str] = None + published_at: datetime + author: Optional[str] = None + language: Optional[str] = None + country: Optional[str] = None + source_id: Optional[int] = None + category_id: Optional[int] = None + + +class NewsArticleCreate(NewsArticleBase): + pass + + +class NewsArticleUpdate(BaseModel): + title: Optional[str] = None + description: Optional[str] = None + content: Optional[str] = None + image_url: Optional[str] = None + category_id: Optional[int] = None + + +class NewsArticle(NewsArticleBase): + id: int + created_at: datetime + updated_at: datetime + source: Optional[NewsSource] = None + category: Optional[NewsCategory] = None + + class Config: + from_attributes = True + + +class SavedArticleBase(BaseModel): + article_id: int + notes: Optional[str] = None + + +class SavedArticleCreate(SavedArticleBase): + pass + + +class SavedArticle(SavedArticleBase): + id: int + user_id: int + created_at: datetime + updated_at: datetime + article: Optional[NewsArticle] = None + + class Config: + from_attributes = True + + +class UserPreferenceBase(BaseModel): + keywords: Optional[str] = None + sources: Optional[str] = None + categories: Optional[str] = None + countries: Optional[str] = None + languages: Optional[str] = None + + +class UserPreferenceCreate(UserPreferenceBase): + pass + + +class UserPreferenceUpdate(UserPreferenceBase): + pass + + +class UserPreference(UserPreferenceBase): + id: int + user_id: int + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True + + +# For API responses +class NewsResponse(BaseModel): + items: List[NewsArticle] + total: int \ No newline at end of file diff --git a/app/schemas/token.py b/app/schemas/token.py new file mode 100644 index 0000000..713384e --- /dev/null +++ b/app/schemas/token.py @@ -0,0 +1,11 @@ +from typing import Optional +from pydantic import BaseModel + + +class Token(BaseModel): + access_token: str + token_type: str + + +class TokenPayload(BaseModel): + sub: Optional[int] = None \ No newline at end of file diff --git a/app/schemas/user.py b/app/schemas/user.py new file mode 100644 index 0000000..543a995 --- /dev/null +++ b/app/schemas/user.py @@ -0,0 +1,37 @@ +from typing import Optional +from datetime import datetime +from pydantic import BaseModel, EmailStr + + +class UserBase(BaseModel): + email: Optional[EmailStr] = None + username: Optional[str] = None + is_active: Optional[bool] = True + is_superuser: bool = False + + +class UserCreate(UserBase): + email: EmailStr + username: str + password: str + + +class UserUpdate(UserBase): + password: Optional[str] = None + + +class UserInDBBase(UserBase): + id: Optional[int] = None + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None + + class Config: + from_attributes = True + + +class User(UserInDBBase): + pass + + +class UserInDB(UserInDBBase): + hashed_password: str \ No newline at end of file diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..a6c3bd4 --- /dev/null +++ b/app/services/__init__.py @@ -0,0 +1 @@ +from app.services.mediastack import mediastack_client # noqa \ No newline at end of file diff --git a/app/services/background_tasks.py b/app/services/background_tasks.py new file mode 100644 index 0000000..096a90d --- /dev/null +++ b/app/services/background_tasks.py @@ -0,0 +1,80 @@ +import asyncio +import logging +from typing import List, Optional + +from app.db.session import SessionLocal +from app.services.news import fetch_and_store_news + +logger = logging.getLogger(__name__) + + +async def refresh_news_periodically( + interval_minutes: int = 60, + categories: Optional[List[str]] = None, + countries: Optional[List[str]] = None, + languages: Optional[List[str]] = None, +): + """ + Periodically fetch and store news from the Mediastack API. + + Args: + interval_minutes: The interval in minutes between fetches. + categories: A list of categories to fetch news for. + countries: A list of countries to fetch news for. + languages: A list of languages to fetch news for. + """ + categories_str = None + if categories: + categories_str = ",".join(categories) + + countries_str = None + if countries: + countries_str = ",".join(countries) + + languages_str = None + if languages: + languages_str = ",".join(languages) + + while True: + try: + logger.info("Refreshing news...") + db = SessionLocal() + try: + await fetch_and_store_news( + db, + categories=categories_str, + countries=countries_str, + languages=languages_str, + limit=100, + ) + logger.info("News refresh completed successfully.") + finally: + db.close() + + # Sleep for the specified interval + await asyncio.sleep(interval_minutes * 60) + + except Exception as e: + logger.error(f"Error refreshing news: {e}") + # If an error occurs, wait a shorter time before retrying + await asyncio.sleep(300) # 5 minutes + + +async def start_background_tasks(): + """ + Start background tasks for the application. + """ + # Default categories, countries, and languages to fetch news for + categories = ["general", "business", "technology", "entertainment", "health", "science", "sports"] + countries = ["us", "gb", "ca", "au"] # US, UK, Canada, Australia + languages = ["en"] # English only + + # Start the background task + asyncio.create_task( + refresh_news_periodically( + interval_minutes=60, + categories=categories, + countries=countries, + languages=languages, + ) + ) \ No newline at end of file diff --git a/app/services/mediastack.py b/app/services/mediastack.py new file mode 100644 index 0000000..0c60790 --- /dev/null +++ b/app/services/mediastack.py @@ -0,0 +1,198 @@ +from typing import Dict, List, Optional, Any +import logging +import hashlib +import json +import httpx +from tenacity import retry, stop_after_attempt, wait_exponential + +from app.core.config import settings +from app.core.cache import api_cache + +logger = logging.getLogger(__name__) + + +class MediastackClient: + """ + Client for interacting with the Mediastack API. + """ + + def __init__(self, api_key: str = None, base_url: str = None): + """ + Initialize the Mediastack API client. + + Args: + api_key: The Mediastack API key. Defaults to settings.MEDIASTACK_API_KEY. + base_url: The base URL for the Mediastack API. Defaults to settings.MEDIASTACK_BASE_URL. + """ + self.api_key = api_key or settings.MEDIASTACK_API_KEY + self.base_url = base_url or settings.MEDIASTACK_BASE_URL + + if not self.api_key: + logger.warning("Mediastack API key not provided. API calls will fail.") + + def _get_cache_key(self, endpoint: str, params: Dict[str, Any]) -> str: + """ + Generate a cache key for the request. + + Args: + endpoint: The API endpoint. + params: The request parameters. + + Returns: + A cache key string. + """ + # Create a copy of the params to avoid modifying the original + cache_params = params.copy() + + # Remove the API key from the cache key for security + if "access_key" in cache_params: + del cache_params["access_key"] + + # Create a string representation of the params + params_str = json.dumps(cache_params, sort_keys=True) + + # Create a hash of the endpoint and params + return f"mediastack:{endpoint}:{hashlib.md5(params_str.encode()).hexdigest()}" + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=10), + ) + async def _make_request( + self, endpoint: str, params: Dict[str, Any] = None + ) -> Dict[str, Any]: + """ + Make a request to the Mediastack API. + + Args: + endpoint: The API endpoint to request. + params: Query parameters to include in the request. + + Returns: + The API response as a dictionary. + + Raises: + httpx.HTTPStatusError: If the request fails. + """ + url = f"{self.base_url}/{endpoint}" + params = params or {} + params["access_key"] = self.api_key + + # Generate cache key + cache_key = self._get_cache_key(endpoint, params) + + # Check cache first + cached_response = api_cache.get(cache_key) + if cached_response: + logger.info(f"Using cached response for {endpoint}") + return cached_response + + # Make the request if not cached + async with httpx.AsyncClient() as client: + response = await client.get(url, params=params) + response.raise_for_status() + response_data = response.json() + + # Cache the response + api_cache.set(cache_key, response_data) + + return response_data + + async def get_live_news( + self, + keywords: Optional[str] = None, + sources: Optional[str] = None, + categories: Optional[str] = None, + countries: Optional[str] = None, + languages: Optional[str] = None, + limit: int = 25, + offset: int = 0, + sort: str = "published_desc", + use_cache: bool = True, + ) -> Dict[str, Any]: + """ + Get live news articles from the Mediastack API. + + Args: + keywords: Keywords or phrases to search for in the news. + sources: Comma-separated list of news sources to filter by. + categories: Comma-separated list of news categories to filter by. + countries: Comma-separated list of countries to filter by. + languages: Comma-separated list of languages to filter by. + limit: The number of results to return (default: 25, max: 100). + offset: The number of results to skip (for pagination). + sort: The order to sort results (published_desc or published_asc). + use_cache: Whether to use cached responses if available. + + Returns: + A dictionary containing the API response with news articles. + """ + params = { + "limit": min(limit, 100), # Mediastack has a max limit of 100 + "offset": offset, + "sort": sort, + } + + # Add optional filters if provided + if keywords: + params["keywords"] = keywords + if sources: + params["sources"] = sources + if categories: + params["categories"] = categories + if countries: + params["countries"] = countries + if languages: + params["languages"] = languages + + try: + # If we don't want to use cache, invalidate it first + if not use_cache: + cache_key = self._get_cache_key("news", params) + api_cache.delete(cache_key) + + return await self._make_request("news", params) + except httpx.HTTPStatusError as e: + logger.error(f"Error fetching news from Mediastack: {e}") + raise + + async def get_sources(self) -> List[Dict[str, str]]: + """ + Get a list of available news sources from the Mediastack API. + + Note: This is a fake implementation since Mediastack doesn't appear to have a + specific endpoint for listing sources. + + Returns: + A list of news sources. + """ + # This is a placeholder. In reality, you'd need to extract sources from the + # news articles or maintain your own list. + return [ + {"name": "CNN", "source_id": "cnn", "url": "https://cnn.com"}, + {"name": "BBC", "source_id": "bbc", "url": "https://bbc.com"}, + {"name": "Reuters", "source_id": "reuters", "url": "https://reuters.com"}, + {"name": "New York Times", "source_id": "nytimes", "url": "https://nytimes.com"}, + {"name": "The Guardian", "source_id": "guardian", "url": "https://theguardian.com"}, + ] + + async def get_categories(self) -> List[Dict[str, str]]: + """ + Get a list of available news categories from the Mediastack API. + + Note: This is based on Mediastack's documentation. + + Returns: + A list of news categories. + """ + # These are the categories supported by Mediastack according to documentation + categories = [ + "general", "business", "entertainment", "health", + "science", "sports", "technology" + ] + + return [{"name": category} for category in categories] + + +# Create a default client instance for easy importing +mediastack_client = MediastackClient() \ No newline at end of file diff --git a/app/services/news.py b/app/services/news.py new file mode 100644 index 0000000..38331d3 --- /dev/null +++ b/app/services/news.py @@ -0,0 +1,272 @@ +from typing import List, Optional, Tuple +from datetime import datetime +import logging +from sqlalchemy.orm import Session +from sqlalchemy import desc, or_ + +from app.models.news import ( + NewsArticle, NewsSource, NewsCategory, SavedArticle +) +from app.schemas.news import ( + NewsArticleCreate, NewsSourceCreate, NewsCategoryCreate +) +from app.services.mediastack import mediastack_client + +logger = logging.getLogger(__name__) + + +def get_news_source_by_source_id(db: Session, source_id: str) -> Optional[NewsSource]: + """Get a news source by its source_id.""" + return db.query(NewsSource).filter(NewsSource.source_id == source_id).first() + + +def create_news_source(db: Session, source: NewsSourceCreate) -> NewsSource: + """Create a new news source.""" + db_source = NewsSource(**source.model_dump()) + db.add(db_source) + db.commit() + db.refresh(db_source) + return db_source + + +def get_or_create_news_source(db: Session, source: NewsSourceCreate) -> NewsSource: + """Get a news source by source_id or create it if it doesn't exist.""" + db_source = get_news_source_by_source_id(db, source.source_id) + if db_source: + return db_source + return create_news_source(db, source) + + +def get_news_category_by_name(db: Session, name: str) -> Optional[NewsCategory]: + """Get a news category by its name.""" + return db.query(NewsCategory).filter(NewsCategory.name == name).first() + + +def create_news_category(db: Session, category: NewsCategoryCreate) -> NewsCategory: + """Create a new news category.""" + db_category = NewsCategory(**category.model_dump()) + db.add(db_category) + db.commit() + db.refresh(db_category) + return db_category + + +def get_or_create_news_category(db: Session, category: NewsCategoryCreate) -> NewsCategory: + """Get a news category by name or create it if it doesn't exist.""" + db_category = get_news_category_by_name(db, category.name) + if db_category: + return db_category + return create_news_category(db, category) + + +def get_news_article_by_url(db: Session, url: str) -> Optional[NewsArticle]: + """Get a news article by its URL.""" + return db.query(NewsArticle).filter(NewsArticle.url == url).first() + + +def create_news_article(db: Session, article: NewsArticleCreate) -> NewsArticle: + """Create a new news article.""" + db_article = NewsArticle(**article.model_dump()) + db.add(db_article) + db.commit() + db.refresh(db_article) + return db_article + + +def get_or_create_news_article(db: Session, article: NewsArticleCreate) -> Tuple[NewsArticle, bool]: + """ + Get a news article by URL or create it if it doesn't exist. + + Returns: + A tuple containing the article and a boolean indicating if it was created. + """ + db_article = get_news_article_by_url(db, article.url) + if db_article: + return db_article, False + return create_news_article(db, article), True + + +def get_news_articles( + db: Session, + skip: int = 0, + limit: int = 100, + keywords: Optional[str] = None, + source_ids: Optional[List[int]] = None, + category_ids: Optional[List[int]] = None, + countries: Optional[List[str]] = None, + languages: Optional[List[str]] = None, +) -> Tuple[List[NewsArticle], int]: + """ + Get news articles with optional filtering. + + Returns: + A tuple containing the list of articles and the total count. + """ + query = db.query(NewsArticle) + + # Apply filters + if keywords: + search_terms = [f"%{term.strip()}%" for term in keywords.split(",")] + search_conditions = [] + for term in search_terms: + search_conditions.append(NewsArticle.title.ilike(term)) + search_conditions.append(NewsArticle.description.ilike(term)) + search_conditions.append(NewsArticle.content.ilike(term)) + query = query.filter(or_(*search_conditions)) + + if source_ids: + query = query.filter(NewsArticle.source_id.in_(source_ids)) + + if category_ids: + query = query.filter(NewsArticle.category_id.in_(category_ids)) + + if countries: + query = query.filter(NewsArticle.country.in_(countries)) + + if languages: + query = query.filter(NewsArticle.language.in_(languages)) + + # Count total before pagination + total = query.count() + + # Apply pagination and sorting + query = query.order_by(desc(NewsArticle.published_at)).offset(skip).limit(limit) + + return query.all(), total + + +def save_article_for_user( + db: Session, user_id: int, article_id: int, notes: Optional[str] = None +) -> SavedArticle: + """Save an article for a user.""" + saved_article = SavedArticle( + user_id=user_id, + article_id=article_id, + notes=notes, + ) + db.add(saved_article) + db.commit() + db.refresh(saved_article) + return saved_article + + +def get_saved_articles_for_user( + db: Session, user_id: int, skip: int = 0, limit: int = 100 +) -> Tuple[List[SavedArticle], int]: + """Get articles saved by a user.""" + query = ( + db.query(SavedArticle) + .filter(SavedArticle.user_id == user_id) + .join(NewsArticle) + .order_by(desc(SavedArticle.created_at)) + ) + + total = query.count() + result = query.offset(skip).limit(limit).all() + + return result, total + + +def delete_saved_article(db: Session, user_id: int, saved_article_id: int) -> bool: + """ + Delete a saved article for a user. + + Returns: + True if the article was deleted, False if it wasn't found. + """ + saved_article = ( + db.query(SavedArticle) + .filter(SavedArticle.id == saved_article_id, SavedArticle.user_id == user_id) + .first() + ) + + if not saved_article: + return False + + db.delete(saved_article) + db.commit() + return True + + +async def fetch_and_store_news( + db: Session, + keywords: Optional[str] = None, + sources: Optional[str] = None, + categories: Optional[str] = None, + countries: Optional[str] = None, + languages: Optional[str] = None, + limit: int = 100, +) -> List[NewsArticle]: + """ + Fetch news from the Mediastack API and store them in the database. + + Returns: + A list of news articles that were fetched and stored. + """ + try: + # Fetch news from the API + response = await mediastack_client.get_live_news( + keywords=keywords, + sources=sources, + categories=categories, + countries=countries, + languages=languages, + limit=limit, + ) + + stored_articles = [] + + # Process and store each article + for article_data in response.get("data", []): + # Skip articles without required fields + if not article_data.get("title") or not article_data.get("url"): + continue + + # Handle the source + source = None + if article_data.get("source"): + source_create = NewsSourceCreate( + name=article_data["source"], + source_id=article_data["source"].lower().replace(" ", "_"), + url=None, # API doesn't provide source URL + ) + source = get_or_create_news_source(db, source_create) + + # Handle the category + category = None + if article_data.get("category"): + category_create = NewsCategoryCreate(name=article_data["category"]) + category = get_or_create_news_category(db, category_create) + + # Parse the published date + published_at = datetime.utcnow() + if article_data.get("published_at"): + try: + published_at = datetime.fromisoformat(article_data["published_at"].replace("Z", "+00:00")) + except ValueError: + # If we can't parse the date, use current time + pass + + # Create the article + article_create = NewsArticleCreate( + title=article_data["title"], + description=article_data.get("description"), + content=article_data.get("content"), + url=article_data["url"], + image_url=article_data.get("image"), + published_at=published_at, + author=article_data.get("author"), + language=article_data.get("language"), + country=article_data.get("country"), + source_id=source.id if source else None, + category_id=category.id if category else None, + ) + + article, created = get_or_create_news_article(db, article_create) + stored_articles.append(article) + + return stored_articles + + except Exception as e: + logger.error(f"Error fetching news from Mediastack: {e}") + return [] \ No newline at end of file diff --git a/app/services/user.py b/app/services/user.py new file mode 100644 index 0000000..8590d73 --- /dev/null +++ b/app/services/user.py @@ -0,0 +1,152 @@ +from typing import Optional, List +from sqlalchemy.orm import Session + +from app.core.security import get_password_hash, verify_password +from app.models.user import User +from app.models.news import UserPreference +from app.schemas.user import UserCreate, UserUpdate + + +def get_user(db: Session, user_id: int) -> Optional[User]: + """Get a user by ID.""" + return db.query(User).filter(User.id == user_id).first() + + +def get_user_by_email(db: Session, email: str) -> Optional[User]: + """Get a user by email.""" + return db.query(User).filter(User.email == email).first() + + +def get_user_by_username(db: Session, username: str) -> Optional[User]: + """Get a user by username.""" + return db.query(User).filter(User.username == username).first() + + +def get_users(db: Session, skip: int = 0, limit: int = 100) -> List[User]: + """Get a list of users.""" + return db.query(User).offset(skip).limit(limit).all() + + +def create_user(db: Session, user_in: UserCreate) -> User: + """Create a new user.""" + # Check if user already exists + db_user = get_user_by_email(db, email=user_in.email) + if db_user: + raise ValueError("Email already registered") + + db_user = get_user_by_username(db, username=user_in.username) + if db_user: + raise ValueError("Username already taken") + + # Create the user + user_data = user_in.model_dump(exclude={"password"}) + db_user = User(**user_data, hashed_password=get_password_hash(user_in.password)) + db.add(db_user) + db.commit() + db.refresh(db_user) + + # Create default user preferences + user_preference = UserPreference(user_id=db_user.id) + db.add(user_preference) + db.commit() + + return db_user + + +def update_user(db: Session, db_user: User, user_in: UserUpdate) -> User: + """Update a user's information.""" + # Update user fields + update_data = user_in.model_dump(exclude_unset=True) + + if "password" in update_data: + hashed_password = get_password_hash(update_data["password"]) + del update_data["password"] + update_data["hashed_password"] = hashed_password + + for field, value in update_data.items(): + setattr(db_user, field, value) + + db.add(db_user) + db.commit() + db.refresh(db_user) + return db_user + + +def authenticate_user(db: Session, username: str, password: str) -> Optional[User]: + """ + Authenticate a user. + + Args: + db: The database session. + username: The username or email. + password: The plaintext password. + + Returns: + The user if authentication is successful, None otherwise. + """ + # Try to find user by username first + user = get_user_by_username(db, username=username) + + # If not found, try by email + if not user: + user = get_user_by_email(db, email=username) + + if not user: + return None + + if not verify_password(password, user.hashed_password): + return None + + return user + + +def is_active(user: User) -> bool: + """Check if a user is active.""" + return user.is_active + + +def is_superuser(user: User) -> bool: + """Check if a user is a superuser.""" + return user.is_superuser + + +def get_user_preference(db: Session, user_id: int) -> Optional[UserPreference]: + """Get a user's preferences.""" + return db.query(UserPreference).filter(UserPreference.user_id == user_id).first() + + +def update_user_preference( + db: Session, user_id: int, keywords: Optional[str] = None, + sources: Optional[str] = None, categories: Optional[str] = None, + countries: Optional[str] = None, languages: Optional[str] = None +) -> UserPreference: + """Update a user's preferences.""" + user_preference = get_user_preference(db, user_id) + + if not user_preference: + # Create preference if it doesn't exist + user_preference = UserPreference( + user_id=user_id, + keywords=keywords, + sources=sources, + categories=categories, + countries=countries, + languages=languages, + ) + db.add(user_preference) + else: + # Update existing preference + if keywords is not None: + user_preference.keywords = keywords + if sources is not None: + user_preference.sources = sources + if categories is not None: + user_preference.categories = categories + if countries is not None: + user_preference.countries = countries + if languages is not None: + user_preference.languages = languages + + db.commit() + db.refresh(user_preference) + return user_preference \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..b0eeb5b --- /dev/null +++ b/main.py @@ -0,0 +1,82 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.openapi.utils import get_openapi +import uvicorn +import logging +from app.api.v1.api import api_router +from app.core.config import settings +from app.core.utils import configure_logging +from app.models import base # noqa: F401 +from app.services.background_tasks import start_background_tasks + +# Configure logging +configure_logging() +logger = logging.getLogger(__name__) + +app = FastAPI( + title=settings.PROJECT_NAME, + openapi_url="/openapi.json", + docs_url="/docs", + redoc_url="/redoc", +) + +# Set all CORS enabled origins +if settings.BACKEND_CORS_ORIGINS: + app.add_middleware( + CORSMiddleware, + allow_origins=[str(origin) for origin in settings.BACKEND_CORS_ORIGINS], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + +app.include_router(api_router, prefix=settings.API_V1_STR) + + +@app.get("/health", tags=["health"]) +async def health_check(): + """ + Health check endpoint + """ + return {"status": "ok"} + + +def custom_openapi(): + if app.openapi_schema: + return app.openapi_schema + openapi_schema = get_openapi( + title=settings.PROJECT_NAME, + version=settings.VERSION, + description=settings.DESCRIPTION, + routes=app.routes, + ) + app.openapi_schema = openapi_schema + return app.openapi_schema + + +app.openapi = custom_openapi + + +@app.on_event("startup") +async def startup_event(): + """ + Function that runs on application startup. + """ + logger.info("Starting up application...") + + # Start background tasks + await start_background_tasks() + + logger.info("Application startup complete.") + + +@app.on_event("shutdown") +async def shutdown_event(): + """ + Function that runs on application shutdown. + """ + logger.info("Shutting down application...") + + +if __name__ == "__main__": + uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) \ No newline at end of file diff --git a/migrations/__init__.py b/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/migrations/env.py b/migrations/env.py new file mode 100644 index 0000000..5af5c1d --- /dev/null +++ b/migrations/env.py @@ -0,0 +1,90 @@ +from logging.config import fileConfig +import os +import sys +from alembic import context +from sqlalchemy import engine_from_config, pool + +# Add the parent directory to sys.path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +# Import the SQLAlchemy base and models +from app.db.session import Base +import app.models # noqa + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + # For SQLite, enable foreign keys and use batch mode + is_sqlite = connection.dialect.name == "sqlite" + + context.configure( + connection=connection, + target_metadata=target_metadata, + render_as_batch=is_sqlite, # SQLite batch mode + ) + + # For SQLite, enable foreign keys + if is_sqlite: + connection.execute("PRAGMA foreign_keys=ON") + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() \ No newline at end of file diff --git a/migrations/script.py.mako b/migrations/script.py.mako new file mode 100644 index 0000000..1e4564e --- /dev/null +++ b/migrations/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} \ No newline at end of file diff --git a/migrations/versions/__init__.py b/migrations/versions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/migrations/versions/initial_migration.py b/migrations/versions/initial_migration.py new file mode 100644 index 0000000..5acf16d --- /dev/null +++ b/migrations/versions/initial_migration.py @@ -0,0 +1,143 @@ +"""initial migration + +Revision ID: 39df02d3c7d5 +Revises: +Create Date: 2023-11-01 12:00:00.000000 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '39df02d3c7d5' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # Create user table + op.create_table( + 'user', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('email', sa.String(), nullable=False), + sa.Column('username', sa.String(), nullable=False), + sa.Column('hashed_password', sa.String(), nullable=False), + sa.Column('is_active', sa.Boolean(), nullable=True), + sa.Column('is_superuser', sa.Boolean(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id'), + ) + op.create_index(op.f('ix_user_email'), 'user', ['email'], unique=True) + op.create_index(op.f('ix_user_id'), 'user', ['id'], unique=False) + op.create_index(op.f('ix_user_username'), 'user', ['username'], unique=True) + + # Create news source table + op.create_table( + 'newssource', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('source_id', sa.String(), nullable=False), + sa.Column('url', sa.String(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id'), + ) + op.create_index(op.f('ix_newssource_id'), 'newssource', ['id'], unique=False) + op.create_index(op.f('ix_newssource_name'), 'newssource', ['name'], unique=False) + op.create_index(op.f('ix_newssource_source_id'), 'newssource', ['source_id'], unique=True) + + # Create news category table + op.create_table( + 'newscategory', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id'), + ) + op.create_index(op.f('ix_newscategory_id'), 'newscategory', ['id'], unique=False) + op.create_index(op.f('ix_newscategory_name'), 'newscategory', ['name'], unique=True) + + # Create news article table + op.create_table( + 'newsarticle', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('title', sa.String(), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('content', sa.Text(), nullable=True), + sa.Column('url', sa.String(), nullable=False), + sa.Column('image_url', sa.String(), nullable=True), + sa.Column('published_at', sa.DateTime(), nullable=False), + sa.Column('author', sa.String(), nullable=True), + sa.Column('language', sa.String(), nullable=True), + sa.Column('country', sa.String(), nullable=True), + sa.Column('source_id', sa.Integer(), nullable=True), + sa.Column('category_id', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint(['category_id'], ['newscategory.id'], ), + sa.ForeignKeyConstraint(['source_id'], ['newssource.id'], ), + sa.PrimaryKeyConstraint('id'), + ) + op.create_index(op.f('ix_newsarticle_id'), 'newsarticle', ['id'], unique=False) + op.create_index(op.f('ix_newsarticle_published_at'), 'newsarticle', ['published_at'], unique=False) + op.create_index(op.f('ix_newsarticle_title'), 'newsarticle', ['title'], unique=False) + op.create_index(op.f('ix_newsarticle_url'), 'newsarticle', ['url'], unique=True) + + # Create saved article table + op.create_table( + 'savedarticle', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('article_id', sa.Integer(), nullable=False), + sa.Column('notes', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint(['article_id'], ['newsarticle.id'], ), + sa.ForeignKeyConstraint(['user_id'], ['user.id'], ), + sa.PrimaryKeyConstraint('id'), + ) + op.create_index(op.f('ix_savedarticle_id'), 'savedarticle', ['id'], unique=False) + + # Create user preference table + op.create_table( + 'userpreference', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('keywords', sa.String(), nullable=True), + sa.Column('sources', sa.String(), nullable=True), + sa.Column('categories', sa.String(), nullable=True), + sa.Column('countries', sa.String(), nullable=True), + sa.Column('languages', sa.String(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint(['user_id'], ['user.id'], ), + sa.PrimaryKeyConstraint('id'), + ) + op.create_index(op.f('ix_userpreference_id'), 'userpreference', ['id'], unique=False) + + +def downgrade(): + op.drop_index(op.f('ix_userpreference_id'), table_name='userpreference') + op.drop_table('userpreference') + op.drop_index(op.f('ix_savedarticle_id'), table_name='savedarticle') + op.drop_table('savedarticle') + op.drop_index(op.f('ix_newsarticle_url'), table_name='newsarticle') + op.drop_index(op.f('ix_newsarticle_title'), table_name='newsarticle') + op.drop_index(op.f('ix_newsarticle_published_at'), table_name='newsarticle') + op.drop_index(op.f('ix_newsarticle_id'), table_name='newsarticle') + op.drop_table('newsarticle') + op.drop_index(op.f('ix_newscategory_name'), table_name='newscategory') + op.drop_index(op.f('ix_newscategory_id'), table_name='newscategory') + op.drop_table('newscategory') + op.drop_index(op.f('ix_newssource_source_id'), table_name='newssource') + op.drop_index(op.f('ix_newssource_name'), table_name='newssource') + op.drop_index(op.f('ix_newssource_id'), table_name='newssource') + op.drop_table('newssource') + op.drop_index(op.f('ix_user_username'), table_name='user') + op.drop_index(op.f('ix_user_id'), table_name='user') + op.drop_index(op.f('ix_user_email'), table_name='user') + op.drop_table('user') \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..675ddc6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +fastapi>=0.96.0,<0.97.0 +uvicorn>=0.22.0,<0.23.0 +pydantic>=2.0.0,<3.0.0 +pydantic-settings>=2.0.0,<3.0.0 +sqlalchemy>=2.0.0,<3.0.0 +alembic>=1.11.0,<1.12.0 +httpx>=0.24.0,<0.25.0 +python-jose[cryptography]>=3.3.0,<3.4.0 +passlib[bcrypt]>=1.7.4,<1.8.0 +python-multipart>=0.0.6,<0.0.7 +email-validator>=2.0.0,<3.0.0 +tenacity>=8.2.2,<8.3.0 +ruff>=0.0.270,<0.1.0 \ No newline at end of file