import hashlib from datetime import datetime from typing import Optional import requests from sqlalchemy.orm import Session from app.models.website import Website, WebsiteAlert def get_content_hash(content: str) -> str: return hashlib.md5(content.encode('utf-8')).hexdigest() def fetch_website_content(url: str) -> Optional[str]: try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() return response.text except requests.RequestException as e: print(f"Error fetching {url}: {str(e)}") return None def check_website_for_updates(website: Website, db: Session) -> bool: content = fetch_website_content(website.url) if content is None: return False current_hash = get_content_hash(content) website.last_checked = datetime.utcnow() has_changes = False if website.last_content_hash and website.last_content_hash != current_hash: has_changes = True alert = WebsiteAlert( website_id=website.id, alert_message=f"Website '{website.name}' has been updated. Changes detected at {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC." ) db.add(alert) website.last_content_hash = current_hash db.commit() return has_changes def check_all_active_websites(db: Session): active_websites = db.query(Website).filter(Website.is_active).all() for website in active_websites: if should_check_website(website): print(f"Checking website: {website.name} ({website.url})") check_website_for_updates(website, db) def should_check_website(website: Website) -> bool: if not website.last_checked: return True time_since_last_check = datetime.utcnow() - website.last_checked return time_since_last_check.total_seconds() >= (website.check_interval_minutes * 60)