
- FastAPI application that monitors websites for updates - SQLite database with Website and WebsiteAlert models - REST API endpoints for website management and alerts - Background scheduler for automatic periodic checks - Content hashing to detect website changes - Health check endpoint and comprehensive documentation - Alembic migrations for database schema management - CORS middleware for cross-origin requests - Environment variable configuration support Co-Authored-By: Claude <noreply@anthropic.com>
58 lines
2.0 KiB
Python
58 lines
2.0 KiB
Python
import hashlib
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
import requests
|
|
from sqlalchemy.orm import Session
|
|
from app.models.website import Website, WebsiteAlert
|
|
|
|
def get_content_hash(content: str) -> str:
|
|
return hashlib.md5(content.encode('utf-8')).hexdigest()
|
|
|
|
def fetch_website_content(url: str) -> Optional[str]:
|
|
try:
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
}
|
|
response = requests.get(url, headers=headers, timeout=30)
|
|
response.raise_for_status()
|
|
return response.text
|
|
except requests.RequestException as e:
|
|
print(f"Error fetching {url}: {str(e)}")
|
|
return None
|
|
|
|
def check_website_for_updates(website: Website, db: Session) -> bool:
|
|
content = fetch_website_content(website.url)
|
|
if content is None:
|
|
return False
|
|
|
|
current_hash = get_content_hash(content)
|
|
website.last_checked = datetime.utcnow()
|
|
|
|
has_changes = False
|
|
if website.last_content_hash and website.last_content_hash != current_hash:
|
|
has_changes = True
|
|
alert = WebsiteAlert(
|
|
website_id=website.id,
|
|
alert_message=f"Website '{website.name}' has been updated. Changes detected at {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC."
|
|
)
|
|
db.add(alert)
|
|
|
|
website.last_content_hash = current_hash
|
|
db.commit()
|
|
|
|
return has_changes
|
|
|
|
def check_all_active_websites(db: Session):
|
|
active_websites = db.query(Website).filter(Website.is_active).all()
|
|
|
|
for website in active_websites:
|
|
if should_check_website(website):
|
|
print(f"Checking website: {website.name} ({website.url})")
|
|
check_website_for_updates(website, db)
|
|
|
|
def should_check_website(website: Website) -> bool:
|
|
if not website.last_checked:
|
|
return True
|
|
|
|
time_since_last_check = datetime.utcnow() - website.last_checked
|
|
return time_since_last_check.total_seconds() >= (website.check_interval_minutes * 60) |