From b852025088c1db922c7281c6deafc931e05bf28e Mon Sep 17 00:00:00 2001 From: Automated Action Date: Fri, 27 Jun 2025 10:57:32 +0000 Subject: [PATCH] Create Website Update Alert Service - FastAPI application that monitors websites for updates - SQLite database with Website and WebsiteAlert models - REST API endpoints for website management and alerts - Background scheduler for automatic periodic checks - Content hashing to detect website changes - Health check endpoint and comprehensive documentation - Alembic migrations for database schema management - CORS middleware for cross-origin requests - Environment variable configuration support Co-Authored-By: Claude --- README.md | 86 ++++++++++++++++++++++- alembic.ini | 41 +++++++++++ alembic/env.py | 49 +++++++++++++ alembic/script.py.mako | 24 +++++++ alembic/versions/001_initial_migration.py | 50 +++++++++++++ app/__init__.py | 0 app/api/alerts.py | 27 +++++++ app/api/schemas.py | 36 ++++++++++ app/api/websites.py | 80 +++++++++++++++++++++ app/db/__init__.py | 0 app/db/base.py | 3 + app/db/session.py | 22 ++++++ app/models/__init__.py | 3 + app/models/website.py | 25 +++++++ app/services/__init__.py | 0 app/services/scheduler.py | 33 +++++++++ app/services/website_checker.py | 58 +++++++++++++++ main.py | 57 +++++++++++++++ requirements.txt | 11 +++ 19 files changed, 603 insertions(+), 2 deletions(-) create mode 100644 alembic.ini create mode 100644 alembic/env.py create mode 100644 alembic/script.py.mako create mode 100644 alembic/versions/001_initial_migration.py create mode 100644 app/__init__.py create mode 100644 app/api/alerts.py create mode 100644 app/api/schemas.py create mode 100644 app/api/websites.py create mode 100644 app/db/__init__.py create mode 100644 app/db/base.py create mode 100644 app/db/session.py create mode 100644 app/models/__init__.py create mode 100644 app/models/website.py create mode 100644 app/services/__init__.py create mode 100644 app/services/scheduler.py create mode 100644 app/services/website_checker.py create mode 100644 main.py create mode 100644 requirements.txt diff --git a/README.md b/README.md index e8acfba..f1a47fc 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,85 @@ -# FastAPI Application +# Website Update Alert Service -This is a FastAPI application bootstrapped by BackendIM, the AI-powered backend generation platform. +A FastAPI application that monitors websites for updates and provides alerts when changes are detected. + +## Features + +- Add websites to monitor with custom check intervals +- Automatic periodic checking for website updates +- REST API for managing websites and viewing alerts +- Background scheduler for continuous monitoring +- SQLite database for data persistence + +## API Endpoints + +### Base +- `GET /` - Service information and documentation links +- `GET /health` - Health check endpoint +- `GET /docs` - Interactive API documentation +- `GET /redoc` - Alternative API documentation + +### Websites +- `POST /websites/` - Add a new website to monitor +- `GET /websites/` - List all monitored websites +- `GET /websites/{id}` - Get specific website details +- `PUT /websites/{id}` - Update website settings +- `DELETE /websites/{id}` - Remove website from monitoring +- `POST /websites/{id}/check` - Manually trigger a check for updates +- `GET /websites/{id}/alerts` - Get alerts for a specific website + +### Alerts +- `GET /alerts/` - List all alerts (with optional unread filter) +- `PUT /alerts/{id}/mark-read` - Mark an alert as read + +## Installation and Setup + +1. Install dependencies: +```bash +pip install -r requirements.txt +``` + +2. Run the application: +```bash +uvicorn main:app --host 0.0.0.0 --port 8000 +``` + +The application will be available at `http://localhost:8000` + +## Environment Variables + +- `CHECK_INTERVAL_MINUTES` - How often to check all websites (default: 5 minutes) + +## Database + +The application uses SQLite with automatic database creation. The database file is stored at `/app/storage/db/db.sqlite`. + +## Example Usage + +1. Add a website to monitor: +```bash +curl -X POST "http://localhost:8000/websites/" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://example.com", + "name": "Example Website", + "check_interval_minutes": 60 + }' +``` + +2. List all websites: +```bash +curl "http://localhost:8000/websites/" +``` + +3. Get alerts: +```bash +curl "http://localhost:8000/alerts/" +``` + +## How It Works + +1. Websites are checked periodically based on their individual check intervals +2. Content is hashed (MD5) to detect changes +3. When changes are detected, alerts are created +4. Background scheduler runs every 5 minutes to check websites that are due for checking +5. Manual checks can be triggered via the API diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..017f263 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,41 @@ +[alembic] +script_location = alembic +prepend_sys_path = . +version_path_separator = os +sqlalchemy.url = sqlite:////app/storage/db/db.sqlite + +[post_write_hooks] + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S \ No newline at end of file diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..75f04e2 --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,49 @@ +import sys +from pathlib import Path +from logging.config import fileConfig +from sqlalchemy import engine_from_config +from sqlalchemy import pool +from alembic import context + +sys.path.append(str(Path(__file__).parent.parent)) + +from app.db.base import Base + +config = context.config + +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +target_metadata = Base.metadata + +def run_migrations_offline() -> None: + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + +def run_migrations_online() -> None: + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() \ No newline at end of file diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..37d0cac --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} \ No newline at end of file diff --git a/alembic/versions/001_initial_migration.py b/alembic/versions/001_initial_migration.py new file mode 100644 index 0000000..f20a9cb --- /dev/null +++ b/alembic/versions/001_initial_migration.py @@ -0,0 +1,50 @@ +"""Initial migration - create websites and website_alerts tables + +Revision ID: 001 +Revises: +Create Date: 2024-01-01 12:00:00.000000 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = '001' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Create websites table + op.create_table('websites', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('url', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('last_checked', sa.DateTime(), nullable=True), + sa.Column('last_content_hash', sa.String(), nullable=True), + sa.Column('is_active', sa.Boolean(), nullable=True), + sa.Column('check_interval_minutes', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=True), + sa.Column('updated_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_websites_id'), 'websites', ['id'], unique=False) + op.create_index(op.f('ix_websites_url'), 'websites', ['url'], unique=True) + + # Create website_alerts table + op.create_table('website_alerts', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('website_id', sa.Integer(), nullable=False), + sa.Column('alert_message', sa.Text(), nullable=False), + sa.Column('detected_at', sa.DateTime(), nullable=True), + sa.Column('is_read', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + + +def downgrade() -> None: + op.drop_table('website_alerts') + op.drop_index(op.f('ix_websites_url'), table_name='websites') + op.drop_index(op.f('ix_websites_id'), table_name='websites') + op.drop_table('websites') \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/alerts.py b/app/api/alerts.py new file mode 100644 index 0000000..f2f58a0 --- /dev/null +++ b/app/api/alerts.py @@ -0,0 +1,27 @@ +from typing import List +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy.orm import Session +from app.db.session import get_db +from app.models.website import WebsiteAlert +from app.api.schemas import AlertResponse + +router = APIRouter(prefix="/alerts", tags=["alerts"]) + +@router.get("/", response_model=List[AlertResponse]) +def list_alerts(skip: int = 0, limit: int = 100, unread_only: bool = False, db: Session = Depends(get_db)): + query = db.query(WebsiteAlert) + if unread_only: + query = query.filter(not WebsiteAlert.is_read) + + alerts = query.order_by(WebsiteAlert.detected_at.desc()).offset(skip).limit(limit).all() + return alerts + +@router.put("/{alert_id}/mark-read") +def mark_alert_read(alert_id: int, db: Session = Depends(get_db)): + alert = db.query(WebsiteAlert).filter(WebsiteAlert.id == alert_id).first() + if alert is None: + raise HTTPException(status_code=404, detail="Alert not found") + + alert.is_read = True + db.commit() + return {"message": "Alert marked as read"} \ No newline at end of file diff --git a/app/api/schemas.py b/app/api/schemas.py new file mode 100644 index 0000000..b16c918 --- /dev/null +++ b/app/api/schemas.py @@ -0,0 +1,36 @@ +from datetime import datetime +from typing import Optional +from pydantic import BaseModel, HttpUrl + +class WebsiteCreate(BaseModel): + url: HttpUrl + name: str + check_interval_minutes: Optional[int] = 60 + +class WebsiteUpdate(BaseModel): + name: Optional[str] = None + check_interval_minutes: Optional[int] = None + is_active: Optional[bool] = None + +class WebsiteResponse(BaseModel): + id: int + url: str + name: str + last_checked: Optional[datetime] + is_active: bool + check_interval_minutes: int + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True + +class AlertResponse(BaseModel): + id: int + website_id: int + alert_message: str + detected_at: datetime + is_read: bool + + class Config: + from_attributes = True \ No newline at end of file diff --git a/app/api/websites.py b/app/api/websites.py new file mode 100644 index 0000000..a47b94e --- /dev/null +++ b/app/api/websites.py @@ -0,0 +1,80 @@ +from typing import List +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy.orm import Session +from app.db.session import get_db +from app.models.website import Website, WebsiteAlert +from app.api.schemas import WebsiteCreate, WebsiteUpdate, WebsiteResponse, AlertResponse + +router = APIRouter(prefix="/websites", tags=["websites"]) + +@router.post("/", response_model=WebsiteResponse) +def create_website(website: WebsiteCreate, db: Session = Depends(get_db)): + db_website = db.query(Website).filter(Website.url == str(website.url)).first() + if db_website: + raise HTTPException(status_code=400, detail="Website URL already registered") + + db_website = Website( + url=str(website.url), + name=website.name, + check_interval_minutes=website.check_interval_minutes + ) + db.add(db_website) + db.commit() + db.refresh(db_website) + return db_website + +@router.get("/", response_model=List[WebsiteResponse]) +def list_websites(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)): + websites = db.query(Website).offset(skip).limit(limit).all() + return websites + +@router.get("/{website_id}", response_model=WebsiteResponse) +def get_website(website_id: int, db: Session = Depends(get_db)): + website = db.query(Website).filter(Website.id == website_id).first() + if website is None: + raise HTTPException(status_code=404, detail="Website not found") + return website + +@router.put("/{website_id}", response_model=WebsiteResponse) +def update_website(website_id: int, website_update: WebsiteUpdate, db: Session = Depends(get_db)): + website = db.query(Website).filter(Website.id == website_id).first() + if website is None: + raise HTTPException(status_code=404, detail="Website not found") + + update_data = website_update.model_dump(exclude_unset=True) + for field, value in update_data.items(): + setattr(website, field, value) + + db.commit() + db.refresh(website) + return website + +@router.delete("/{website_id}") +def delete_website(website_id: int, db: Session = Depends(get_db)): + website = db.query(Website).filter(Website.id == website_id).first() + if website is None: + raise HTTPException(status_code=404, detail="Website not found") + + db.delete(website) + db.commit() + return {"message": "Website deleted successfully"} + +@router.post("/{website_id}/check") +def check_website_now(website_id: int, db: Session = Depends(get_db)): + from app.services.website_checker import check_website_for_updates + + website = db.query(Website).filter(Website.id == website_id).first() + if website is None: + raise HTTPException(status_code=404, detail="Website not found") + + result = check_website_for_updates(website, db) + return {"message": "Website check completed", "has_changes": result} + +@router.get("/{website_id}/alerts", response_model=List[AlertResponse]) +def get_website_alerts(website_id: int, db: Session = Depends(get_db)): + website = db.query(Website).filter(Website.id == website_id).first() + if website is None: + raise HTTPException(status_code=404, detail="Website not found") + + alerts = db.query(WebsiteAlert).filter(WebsiteAlert.website_id == website_id).order_by(WebsiteAlert.detected_at.desc()).all() + return alerts \ No newline at end of file diff --git a/app/db/__init__.py b/app/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/db/base.py b/app/db/base.py new file mode 100644 index 0000000..7c2377a --- /dev/null +++ b/app/db/base.py @@ -0,0 +1,3 @@ +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() \ No newline at end of file diff --git a/app/db/session.py b/app/db/session.py new file mode 100644 index 0000000..3864851 --- /dev/null +++ b/app/db/session.py @@ -0,0 +1,22 @@ +from pathlib import Path +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +DB_DIR = Path("/app/storage/db") +DB_DIR.mkdir(parents=True, exist_ok=True) + +SQLALCHEMY_DATABASE_URL = f"sqlite:///{DB_DIR}/db.sqlite" + +engine = create_engine( + SQLALCHEMY_DATABASE_URL, + connect_args={"check_same_thread": False} +) + +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() \ No newline at end of file diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..0118b8e --- /dev/null +++ b/app/models/__init__.py @@ -0,0 +1,3 @@ +from app.models.website import Website, WebsiteAlert + +__all__ = ["Website", "WebsiteAlert"] \ No newline at end of file diff --git a/app/models/website.py b/app/models/website.py new file mode 100644 index 0000000..e163178 --- /dev/null +++ b/app/models/website.py @@ -0,0 +1,25 @@ +from datetime import datetime +from sqlalchemy import Column, Integer, String, DateTime, Text, Boolean +from app.db.base import Base + +class Website(Base): + __tablename__ = "websites" + + id = Column(Integer, primary_key=True, index=True) + url = Column(String, unique=True, index=True, nullable=False) + name = Column(String, nullable=False) + last_checked = Column(DateTime, default=datetime.utcnow) + last_content_hash = Column(String) + is_active = Column(Boolean, default=True) + check_interval_minutes = Column(Integer, default=60) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + +class WebsiteAlert(Base): + __tablename__ = "website_alerts" + + id = Column(Integer, primary_key=True, index=True) + website_id = Column(Integer, nullable=False) + alert_message = Column(Text, nullable=False) + detected_at = Column(DateTime, default=datetime.utcnow) + is_read = Column(Boolean, default=False) \ No newline at end of file diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/scheduler.py b/app/services/scheduler.py new file mode 100644 index 0000000..5b86a47 --- /dev/null +++ b/app/services/scheduler.py @@ -0,0 +1,33 @@ +import os +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.triggers.interval import IntervalTrigger +from sqlalchemy.orm import sessionmaker +from app.db.session import engine +from app.services.website_checker import check_all_active_websites + +scheduler = BackgroundScheduler() +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +def scheduled_website_check(): + db = SessionLocal() + try: + check_all_active_websites(db) + finally: + db.close() + +def start_scheduler(): + check_interval = int(os.getenv("CHECK_INTERVAL_MINUTES", "5")) + + scheduler.add_job( + func=scheduled_website_check, + trigger=IntervalTrigger(minutes=check_interval), + id='website_check_job', + name='Check websites for updates', + replace_existing=True + ) + + scheduler.start() + print(f"Scheduler started - checking websites every {check_interval} minutes") + +def stop_scheduler(): + scheduler.shutdown() \ No newline at end of file diff --git a/app/services/website_checker.py b/app/services/website_checker.py new file mode 100644 index 0000000..f3be4cf --- /dev/null +++ b/app/services/website_checker.py @@ -0,0 +1,58 @@ +import hashlib +from datetime import datetime +from typing import Optional +import requests +from sqlalchemy.orm import Session +from app.models.website import Website, WebsiteAlert + +def get_content_hash(content: str) -> str: + return hashlib.md5(content.encode('utf-8')).hexdigest() + +def fetch_website_content(url: str) -> Optional[str]: + try: + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + } + response = requests.get(url, headers=headers, timeout=30) + response.raise_for_status() + return response.text + except requests.RequestException as e: + print(f"Error fetching {url}: {str(e)}") + return None + +def check_website_for_updates(website: Website, db: Session) -> bool: + content = fetch_website_content(website.url) + if content is None: + return False + + current_hash = get_content_hash(content) + website.last_checked = datetime.utcnow() + + has_changes = False + if website.last_content_hash and website.last_content_hash != current_hash: + has_changes = True + alert = WebsiteAlert( + website_id=website.id, + alert_message=f"Website '{website.name}' has been updated. Changes detected at {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC." + ) + db.add(alert) + + website.last_content_hash = current_hash + db.commit() + + return has_changes + +def check_all_active_websites(db: Session): + active_websites = db.query(Website).filter(Website.is_active).all() + + for website in active_websites: + if should_check_website(website): + print(f"Checking website: {website.name} ({website.url})") + check_website_for_updates(website, db) + +def should_check_website(website: Website) -> bool: + if not website.last_checked: + return True + + time_since_last_check = datetime.utcnow() - website.last_checked + return time_since_last_check.total_seconds() >= (website.check_interval_minutes * 60) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..41ac52f --- /dev/null +++ b/main.py @@ -0,0 +1,57 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from contextlib import asynccontextmanager +from app.db.session import engine +from app.db.base import Base +from app.api.websites import router as websites_router +from app.api.alerts import router as alerts_router +from app.services.scheduler import start_scheduler, stop_scheduler + +@asynccontextmanager +async def lifespan(app: FastAPI): + Base.metadata.create_all(bind=engine) + start_scheduler() + yield + stop_scheduler() + +app = FastAPI( + title="Website Update Alert Service", + description="A FastAPI service that monitors websites for updates and sends alerts", + version="1.0.0", + lifespan=lifespan, + docs_url="/docs", + redoc_url="/redoc", + openapi_url="/openapi.json" +) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(websites_router) +app.include_router(alerts_router) + +@app.get("/") +async def root(): + return { + "title": "Website Update Alert Service", + "description": "A service that monitors websites for updates and provides alerts", + "documentation": "/docs", + "health_check": "/health" + } + +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "service": "Website Update Alert Service", + "version": "1.0.0" + } + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c978cbc --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +sqlalchemy==2.0.23 +alembic==1.12.1 +pydantic==2.5.0 +python-multipart==0.0.6 +requests==2.31.0 +ruff==0.1.6 +httpx==0.25.2 +python-crontab==3.0.0 +apscheduler==3.10.4 \ No newline at end of file