Add test data generation script for manga inventory database

This commit is contained in:
Automated Action 2025-05-31 13:25:14 +00:00
parent 152286eb92
commit 0839aeabc4
3 changed files with 268 additions and 1 deletions

View File

@ -8,4 +8,6 @@ python-dotenv>=1.0.0
python-multipart>=0.0.6
ruff>=0.1.5
email-validator>=2.1.0
loguru>=0.7.2
loguru>=0.7.2
faker>=20.0.0
pytz>=2023.3

32
scripts/README.md Normal file
View File

@ -0,0 +1,32 @@
# Utility Scripts
This directory contains utility scripts for the Manga Inventory API.
## Available Scripts
### generate_test_data.py
This script generates test data for the Manga Inventory database. It creates:
- 50 authors
- 50 publishers
- 50 genres (or as many as defined in the script)
- 50 manga books with proper relationships
- Manga-genre associations (each manga is assigned 1-5 genres)
#### Usage
```bash
# Make sure the script is executable
chmod +x generate_test_data.py
# Run the script
python scripts/generate_test_data.py
```
#### Requirements
The script requires the following packages which are included in the project's requirements.txt:
- faker
- pytz
- sqlalchemy

233
scripts/generate_test_data.py Executable file
View File

@ -0,0 +1,233 @@
#!/usr/bin/env python
"""Script to generate test data for the manga inventory database."""
import datetime
import logging
import random
import sys
from pathlib import Path
import pytz
from faker import Faker
from sqlalchemy.exc import SQLAlchemyError
# Add the parent directory to the path so we can import the app
sys.path.insert(0, str(Path(__file__).parent.parent))
from app.db.session import SessionLocal
from app.models.manga import Author, Genre, Manga, MangaGenre, Publisher
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize Faker
fake = Faker()
# Constants
NUM_RECORDS = 50 # Number of records to generate for each table
LANGUAGES = ["English", "Japanese", "French", "German", "Spanish", "Korean", "Chinese"]
GENRE_LIST = [
# Action/Adventure
("Action", "Fast-paced stories focusing on physical challenges and conflicts"),
("Adventure", "Stories about exciting journeys and experiences"),
# Comedy
("Comedy", "Manga meant to provoke laughter and amusement"),
("Slice of Life", "Portrays mundane experiences in everyday life"),
# Drama
("Drama", "Focuses on realistic character development and emotional themes"),
("Tragedy", "Depicts suffering of characters and unhappy endings"),
# Fantasy
("Fantasy", "Involves magical or supernatural elements"),
("Isekai", "Protagonists transported to or reborn in another world"),
# Horror
("Horror", "Intended to frighten, scare, or disgust"),
("Supernatural", "Features supernatural elements like ghosts or yokai"),
# Romance
("Romance", "Focuses on romantic relationships"),
("Harem", "Protagonist surrounded by multiple romantic interests"),
("Reverse Harem", "Female protagonist surrounded by male romantic interests"),
# Sci-Fi
("Sci-Fi", "Based on scientific concepts, often set in the future"),
("Cyberpunk", "High-tech dystopian settings with lowlife characters"),
("Mecha", "Focuses on mechanical technology, robots, and piloted suits"),
# Sports
("Sports", "Centered around athletic competitions"),
("Martial Arts", "Focuses on traditional fighting techniques"),
# Demographic
("Shonen", "Aimed at teenage boys, often action-packed"),
("Shojo", "Aimed at teenage girls, often romantic"),
("Seinen", "Aimed at adult men, more mature themes"),
("Josei", "Aimed at adult women, more realistic"),
("Kodomo", "For children"),
# Others
("Mystery", "Focuses on solving puzzles or crimes"),
("Psychological", "Focuses on mental and psychological states"),
("Historical", "Set in a historical period"),
("Cooking", "Centered around food and cooking"),
("School Life", "Set primarily in a school environment"),
("Ecchi", "Contains mild sexual content"),
("Music", "Focused on music and musicians"),
]
def create_authors(db_session):
"""Create sample authors."""
logger.info("Creating authors...")
authors = []
for _ in range(NUM_RECORDS):
author = Author(
name=fake.name(),
biography=fake.text(max_nb_chars=500) if random.random() > 0.2 else None,
)
authors.append(author)
db_session.add_all(authors)
db_session.commit()
logger.info(f"Created {len(authors)} authors")
return authors
def create_publishers(db_session):
"""Create sample publishers."""
logger.info("Creating publishers...")
publishers = []
for _ in range(NUM_RECORDS):
publisher = Publisher(
name=fake.company(),
website=fake.url() if random.random() > 0.3 else None,
country=fake.country() if random.random() > 0.2 else None,
)
publishers.append(publisher)
db_session.add_all(publishers)
db_session.commit()
logger.info(f"Created {len(publishers)} publishers")
return publishers
def create_genres(db_session):
"""Create sample genres."""
logger.info("Creating genres...")
genres = []
# Use predefined genre list, but limit to NUM_RECORDS
selected_genres = GENRE_LIST[:min(NUM_RECORDS, len(GENRE_LIST))]
# If we need more genres than our predefined list, generate some random ones
if NUM_RECORDS > len(GENRE_LIST):
for i in range(NUM_RECORDS - len(GENRE_LIST)):
selected_genres.append((f"Custom Genre {i+1}", fake.sentence()))
for name, description in selected_genres:
genre = Genre(
name=name,
description=description,
)
genres.append(genre)
db_session.add_all(genres)
db_session.commit()
logger.info(f"Created {len(genres)} genres")
return genres
def create_manga(db_session, authors, publishers, genres):
"""Create sample manga with relationships."""
logger.info("Creating manga...")
mangas = []
for i in range(NUM_RECORDS):
# Randomly select an author and publisher (nullable fields)
author = random.choice(authors) if random.random() > 0.1 else None
publisher = random.choice(publishers) if random.random() > 0.1 else None
# Set up base volume information
total_volumes = random.randint(1, 30) if random.random() > 0.3 else None
volume_number = (
random.randint(1, total_volumes) if total_volumes and random.random() > 0.2 else None
)
# Generate a random publication date within the last 30 years
pub_date = None
if random.random() > 0.2:
days_back = random.randint(0, 365 * 30) # Up to 30 years back
pub_date = datetime.datetime.now(pytz.UTC) - datetime.timedelta(days=days_back)
# Create manga
manga = Manga(
title=fake.sentence(nb_words=4)[:-1], # Remove period
original_title=fake.sentence(nb_words=4)[:-1] if random.random() > 0.6 else None,
isbn=f"978-{random.randint(0, 9)}-{random.randint(10000, 99999)}-{random.randint(100, 999)}-{random.randint(0, 9)}" if random.random() > 0.3 else None,
description=fake.text(max_nb_chars=500) if random.random() > 0.2 else None,
volume_number=volume_number,
total_volumes=total_volumes,
author_id=author.id if author else None,
publisher_id=publisher.id if publisher else None,
publication_date=pub_date,
page_count=random.randint(100, 500) if random.random() > 0.2 else None,
price=round(random.uniform(5.99, 29.99), 2) if random.random() > 0.2 else None,
quantity=random.randint(0, 100),
in_stock=random.random() > 0.1, # 90% chance of being in stock
rating=round(random.uniform(1, 10), 1) if random.random() > 0.3 else None,
language=random.choice(LANGUAGES) if random.random() > 0.2 else None,
cover_image_url=f"https://example.com/covers/{i+1}.jpg" if random.random() > 0.3 else None,
)
mangas.append(manga)
db_session.add_all(mangas)
db_session.commit()
logger.info(f"Created {len(mangas)} manga")
# Associate manga with genres (many-to-many)
logger.info("Creating manga-genre associations...")
manga_genres = []
for manga in mangas:
# Assign between 1 and 5 genres to each manga
num_genres = random.randint(1, min(5, len(genres)))
selected_genres = random.sample(genres, num_genres)
for genre in selected_genres:
manga_genre = MangaGenre(
manga_id=manga.id,
genre_id=genre.id,
)
manga_genres.append(manga_genre)
db_session.add_all(manga_genres)
db_session.commit()
logger.info(f"Created {len(manga_genres)} manga-genre associations")
return mangas
def main():
"""Main function to generate test data."""
try:
# Create a new session
db_session = SessionLocal()
# Create tables if they don't exist (shouldn't be needed with migrations)
# Base.metadata.create_all(bind=engine)
# Generate data
authors = create_authors(db_session)
publishers = create_publishers(db_session)
genres = create_genres(db_session)
create_manga(db_session, authors, publishers, genres)
logger.info("Successfully generated test data!")
except SQLAlchemyError as e:
logger.error(f"Database error: {e}")
db_session.rollback()
sys.exit(1)
except Exception as e:
logger.error(f"Error: {e}")
sys.exit(1)
finally:
db_session.close()
if __name__ == "__main__":
main()