42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
import enum
|
|
|
|
from sqlalchemy import Column, String, Integer, DateTime, Enum, Text, JSON
|
|
from sqlalchemy.sql import func
|
|
|
|
from app.db.session import Base
|
|
|
|
|
|
class JobStatus(str, enum.Enum):
|
|
PENDING = "pending"
|
|
IN_PROGRESS = "in_progress"
|
|
COMPLETED = "completed"
|
|
FAILED = "failed"
|
|
|
|
|
|
class ScrapeJob(Base):
|
|
"""
|
|
Model for a web scraping job.
|
|
"""
|
|
|
|
__tablename__ = "scrape_jobs"
|
|
|
|
id = Column(Integer, primary_key=True, index=True)
|
|
url = Column(String(2048), nullable=False, index=True)
|
|
status = Column(Enum(JobStatus), default=JobStatus.PENDING, nullable=False)
|
|
created_at = Column(DateTime, default=func.now(), nullable=False)
|
|
updated_at = Column(
|
|
DateTime, default=func.now(), onupdate=func.now(), nullable=False
|
|
)
|
|
started_at = Column(DateTime, nullable=True)
|
|
completed_at = Column(DateTime, nullable=True)
|
|
selector = Column(String(255), nullable=True)
|
|
error = Column(Text, nullable=True)
|
|
result = Column(JSON, nullable=True)
|
|
user_agent = Column(String(255), nullable=True)
|
|
timeout = Column(Integer, nullable=True)
|
|
|
|
def __repr__(self):
|
|
return (
|
|
f"<ScrapeJob(id={self.id}, url='{self.url}', status='{self.status.value}')>"
|
|
)
|