diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..35c70ea --- /dev/null +++ b/.dockerignore @@ -0,0 +1,76 @@ +# Git +.git +.gitignore + +# Docker +Dockerfile +docker-compose.yml +.dockerignore + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +venv/ +env/ +ENV/ +.venv/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Environment variables +.env +.env.local +.env.production + +# Logs +*.log +logs/ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ + +# Documentation +docs/ +*.md + +# OS +.DS_Store +Thumbs.db + +# Temporary files +*.tmp +*.temp +temp/ + +# Storage (will be mounted as volume) +storage/* +!storage/.gitkeep \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d0df041 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,43 @@ +# Use Python 3.11 slim image +FROM python:3.11-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + DEBIAN_FRONTEND=noninteractive + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + ffmpeg \ + curl \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Set work directory +WORKDIR /app + +# Create storage directory +RUN mkdir -p /app/storage/db + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Create non-root user +RUN adduser --disabled-password --gecos '' appuser && \ + chown -R appuser:appuser /app +USER appuser + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the application +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/README.md b/README.md index 6c7acb2..b4f07f0 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,16 @@ A FastAPI backend for an AI-powered video dubbing tool that allows content creat ## Features -🔐 **Authentication**: JWT-based user registration and login +🔐 **Authentication**: Google OAuth integration for secure login 👤 **User Profiles**: Complete profile management with settings 📁 **Video Upload**: Upload MP4/MOV files to Amazon S3 (max 200MB) +🔍 **Auto Language Detection**: Automatic detection of spoken language using Whisper +📝 **Editable Transcripts**: View and edit transcriptions before translation 🧠 **Transcription**: Audio transcription using OpenAI Whisper API 🌍 **Translation**: Text translation using GPT-4 API 🗣️ **Voice Cloning**: Voice synthesis using ElevenLabs API 🎥 **Video Processing**: Audio replacement and video processing with ffmpeg +🐳 **Docker Support**: Full containerization with Docker and Docker Compose ## Tech Stack @@ -24,65 +27,49 @@ A FastAPI backend for an AI-powered video dubbing tool that allows content creat ## Quick Start -### 1. Install Dependencies +### Option 1: Docker (Recommended) -```bash -pip install -r requirements.txt -``` +1. **Copy environment file**: + ```bash + cp .env.example .env + ``` -### 2. Set Environment Variables +2. **Configure environment variables** in `.env`: + - Add your OpenAI API key + - Configure AWS S3 credentials + - Set up Google OAuth credentials -Create a `.env` file in the root directory with the following variables: - -```env -# Authentication -SECRET_KEY=your-secret-key-change-this-in-production - -# Google OAuth Configuration -GOOGLE_CLIENT_ID=your-google-client-id -GOOGLE_CLIENT_SECRET=your-google-client-secret -GOOGLE_REDIRECT_URI=http://localhost:3000/auth/google/callback - -# AWS S3 Configuration -AWS_ACCESS_KEY_ID=your-aws-access-key -AWS_SECRET_ACCESS_KEY=your-aws-secret-key -AWS_REGION=us-east-1 -S3_BUCKET_NAME=your-s3-bucket-name - -# OpenAI Configuration -OPENAI_API_KEY=your-openai-api-key - -# ElevenLabs Configuration -ELEVENLABS_API_KEY=your-elevenlabs-api-key -``` - -### 3. Run Database Migrations - -The database will be automatically created when you start the application. The SQLite database will be stored at `/app/storage/db/db.sqlite`. - -### 4. Start the Application - -```bash -python main.py -``` - -Or with uvicorn: - -```bash -uvicorn main:app --host 0.0.0.0 --port 8000 --reload -``` +3. **Run with Docker Compose**: + ```bash + docker-compose up -d + ``` The API will be available at: - **API**: http://localhost:8000 - **Documentation**: http://localhost:8000/docs -- **Alternative Docs**: http://localhost:8000/redoc - **Health Check**: http://localhost:8000/health +### Option 2: Local Development + +1. **Install Dependencies**: + ```bash + pip install -r requirements.txt + ``` + +2. **Configure Environment**: + ```bash + cp .env.example .env + # Edit .env with your configuration + ``` + +3. **Start the Application**: + ```bash + uvicorn main:app --host 0.0.0.0 --port 8000 --reload + ``` + ## API Endpoints -### Authentication -- `POST /auth/register` - User registration with email/password -- `POST /auth/login` - User login with email/password +### Authentication (Google OAuth Only) - `GET /auth/google/oauth-url` - Get Google OAuth URL for frontend - `POST /auth/google/login-with-token` - Login/signup with Google ID token - `POST /auth/google/login-with-code` - Login/signup with Google authorization code @@ -94,22 +81,27 @@ The API will be available at: - `PUT /profile/email` - Update email address - `DELETE /profile/` - Delete user account -### Video Management -- `POST /videos/upload` - Upload video with language settings +### Video Management & Language Detection +- `POST /videos/upload` - Upload video with auto language detection - `GET /videos/` - Get user's videos - `GET /videos/{video_id}` - Get specific video details +- `GET /videos/{video_id}/language` - Get detected video language -### Processing Pipeline +### Transcription & Editable Transcripts - `POST /transcription/{video_id}` - Start audio transcription - `GET /transcription/{video_id}` - Get transcription results -- `POST /translation/{video_id}` - Start text translation +- `GET /transcription/{video_id}/editable` - Get editable transcript +- `PUT /transcription/{video_id}/editable` - Update edited transcript + +### Translation Pipeline (Uses Edited Transcripts) +- `POST /translation/{video_id}` - Start text translation (uses edited transcript if available) - `GET /translation/{video_id}` - Get translation results + +### Voice Cloning & Video Processing - `POST /voice/clone/{video_id}` - Start voice cloning and audio generation - `GET /voice/{video_id}` - Get dubbed audio results - `POST /process/{video_id}` - Start final video processing - `GET /process/{video_id}` - Get processed video results - -### Results - `GET /process/results/{video_id}` - Get complete processing results ## Google OAuth Setup @@ -162,32 +154,65 @@ const response = await fetch('/auth/google/login-with-code', { }); ``` +## Docker Setup + +### Building and Running + +```bash +# Build and start the application +docker-compose up -d + +# View logs +docker-compose logs -f api + +# Stop the application +docker-compose down + +# Rebuild after code changes +docker-compose up --build -d +``` + +### Environment Variables + +The application requires the following environment variables (copy from `.env.example`): + +- `OPENAI_API_KEY` - Required for transcription and translation +- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `S3_BUCKET_NAME` - Required for video storage +- `GOOGLE_CLIENT_ID`, `GOOGLE_CLIENT_SECRET` - Required for authentication +- Other optional configuration variables + +### Storage + +The Docker setup includes a persistent volume for: +- SQLite database (`/app/storage/db/`) +- Local file storage (`/app/storage/`) + ## Workflow -1. **Register/Login** (Email/Password or Google OAuth) to get JWT token -2. **Upload Video** with source and target languages +1. **Login** with Google OAuth to get authentication token +2. **Upload Video** - Automatic language detection occurs during upload 3. **Transcribe** the audio from the video -4. **Translate** the transcribed text -5. **Clone Voice** and generate dubbed audio -6. **Process Video** to replace original audio with dubbed audio -7. **Download** the final dubbed video +4. **Edit Transcript** (optional) - Review and correct the transcription +5. **Translate** the edited/original transcript +6. **Clone Voice** and generate dubbed audio +7. **Process Video** to replace original audio with dubbed audio +8. **Download** the final dubbed video ## Environment Variables Reference | Variable | Description | Required | |----------|-------------|----------| -| `SECRET_KEY` | JWT secret key for authentication | Yes | -| `GOOGLE_CLIENT_ID` | Google OAuth client ID | No* | -| `GOOGLE_CLIENT_SECRET` | Google OAuth client secret | No* | -| `GOOGLE_REDIRECT_URI` | Google OAuth redirect URI | No* | +| `OPENAI_API_KEY` | OpenAI API key for Whisper and GPT-4 | Yes | | `AWS_ACCESS_KEY_ID` | AWS access key for S3 | Yes | | `AWS_SECRET_ACCESS_KEY` | AWS secret key for S3 | Yes | | `AWS_REGION` | AWS region (default: us-east-1) | No | | `S3_BUCKET_NAME` | S3 bucket name for file storage | Yes | -| `OPENAI_API_KEY` | OpenAI API key for Whisper and GPT-4 | Yes | +| `GOOGLE_CLIENT_ID` | Google OAuth client ID | Yes | +| `GOOGLE_CLIENT_SECRET` | Google OAuth client secret | Yes | +| `GOOGLE_REDIRECT_URI` | Google OAuth redirect URI | Yes | | `ELEVENLABS_API_KEY` | ElevenLabs API key for voice cloning | Yes | - -*Required only if Google OAuth is enabled +| `DEBUG` | Enable debug mode (default: false) | No | +| `LOG_LEVEL` | Logging level (default: info) | No | ## File Storage Structure diff --git a/alembic/versions/005_add_language_detection_and_editable_transcript.py b/alembic/versions/005_add_language_detection_and_editable_transcript.py new file mode 100644 index 0000000..27e1118 --- /dev/null +++ b/alembic/versions/005_add_language_detection_and_editable_transcript.py @@ -0,0 +1,32 @@ +"""Add language detection and editable transcript features + +Revision ID: 005_add_language_detection_and_editable_transcript +Revises: 004_add_google_oauth_fields +Create Date: 2024-01-01 12:00:00.000000 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '005_add_language_detection_and_editable_transcript' +down_revision = '004_add_google_oauth_fields' +branch_labels = None +depends_on = None + + +def upgrade(): + # Add source_language column to videos table + op.add_column('videos', sa.Column('source_language', sa.String(), nullable=True)) + + # Add edited_text and is_edited columns to transcriptions table + op.add_column('transcriptions', sa.Column('edited_text', sa.Text(), nullable=True)) + op.add_column('transcriptions', sa.Column('is_edited', sa.Boolean(), nullable=True, default=False)) + + +def downgrade(): + # Remove columns in reverse order + op.drop_column('transcriptions', 'is_edited') + op.drop_column('transcriptions', 'edited_text') + op.drop_column('videos', 'source_language') \ No newline at end of file diff --git a/app/models/transcription.py b/app/models/transcription.py index 1aebcbf..446aa71 100644 --- a/app/models/transcription.py +++ b/app/models/transcription.py @@ -1,4 +1,4 @@ -from sqlalchemy import Column, Integer, DateTime, ForeignKey, Text +from sqlalchemy import Column, Integer, DateTime, ForeignKey, Text, Boolean from sqlalchemy.sql import func from app.db.base import Base @@ -9,4 +9,6 @@ class Transcription(Base): id = Column(Integer, primary_key=True, index=True) video_id = Column(Integer, ForeignKey("videos.id"), nullable=False) text = Column(Text, nullable=False) + edited_text = Column(Text, nullable=True) # User-edited transcript + is_edited = Column(Boolean, default=False) # Track if transcript was edited created_at = Column(DateTime(timezone=True), server_default=func.now()) \ No newline at end of file diff --git a/app/models/video.py b/app/models/video.py index f947a48..d4024a4 100644 --- a/app/models/video.py +++ b/app/models/video.py @@ -11,5 +11,6 @@ class Video(Base): original_s3_url = Column(String, nullable=False) language_from = Column(String, nullable=False) language_to = Column(String, nullable=False) + source_language = Column(String, nullable=True) # Auto-detected language status = Column(String, default="uploaded") created_at = Column(DateTime(timezone=True), server_default=func.now()) \ No newline at end of file diff --git a/app/routes/auth.py b/app/routes/auth.py index c0545b9..77943a7 100644 --- a/app/routes/auth.py +++ b/app/routes/auth.py @@ -1,12 +1,11 @@ from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy.orm import Session -from pydantic import BaseModel, EmailStr +from pydantic import BaseModel from typing import Optional from datetime import datetime import logging from app.db.session import get_db from app.models.user import User -from app.utils.auth import get_password_hash, verify_password, create_access_token from app.services.google_oauth_service import GoogleOAuthService logger = logging.getLogger(__name__) @@ -14,177 +13,6 @@ logger = logging.getLogger(__name__) router = APIRouter() -@router.get("/test") -async def test_auth(): - logger.info("Auth test endpoint called") - return { - "message": "Auth router is working", - "status": "success" - } - - -@router.post("/test-token") -async def test_token_creation(): - """Test endpoint to verify JWT token creation is working""" - try: - test_token = create_access_token(data={"sub": "test@example.com"}) - return { - "message": "Token creation successful", - "token_length": len(test_token), - "token_preview": test_token[:50] + "..." if len(test_token) > 50 else test_token - } - except Exception as e: - logger.error(f"Token creation test failed: {e}") - return { - "message": "Token creation failed", - "error": str(e) - } - - -@router.get("/debug/user/{email}") -async def debug_user_status(email: str, db: Session = Depends(get_db)): - """Debug endpoint to check user status - REMOVE IN PRODUCTION""" - try: - db_user = db.query(User).filter(User.email == email).first() - if not db_user: - return {"message": "User not found", "email": email} - - return { - "email": db_user.email, - "has_password_hash": db_user.password_hash is not None, - "password_hash_length": len(db_user.password_hash) if db_user.password_hash else 0, - "is_google_user": db_user.is_google_user, - "email_verified": db_user.email_verified, - "can_login_with_password": db_user.can_login_with_password(), - "created_at": str(db_user.created_at), - "google_id": db_user.google_id - } - except Exception as e: - return {"error": str(e)} - - -class UserCreate(BaseModel): - email: EmailStr - password: str - - -class UserLogin(BaseModel): - email: EmailStr - password: str - - -class Token(BaseModel): - access_token: str - token_type: str - - -class UserResponse(BaseModel): - id: int - email: str - created_at: str - - class Config: - orm_mode = True - - -@router.post("/register", response_model=UserResponse) -async def register(user: UserCreate, db: Session = Depends(get_db)): - try: - logger.info(f"Registration attempt for email: {user.email}") - - # Check if user already exists - db_user = db.query(User).filter(User.email == user.email).first() - if db_user: - logger.warning(f"Registration failed - email already exists: {user.email}") - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Email already registered" - ) - - # Hash password and create user - hashed_password = get_password_hash(user.password) - logger.info(f"Password hashed successfully for user: {user.email}") - - db_user = User( - email=user.email, - password_hash=hashed_password, - is_google_user=False, # Explicitly set for password users - email_verified=False # Email not verified for regular registration - ) - db.add(db_user) - db.commit() - db.refresh(db_user) - - logger.info(f"User registered successfully: {user.email}, can_login_with_password: {db_user.can_login_with_password()}") - return UserResponse( - id=db_user.id, - email=db_user.email, - created_at=str(db_user.created_at) - ) - - except HTTPException: - raise - except Exception as e: - logger.error(f"Registration error for {user.email}: {str(e)}") - db.rollback() - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Internal server error during registration" - ) - - -@router.post("/login", response_model=Token) -async def login(user: UserLogin, db: Session = Depends(get_db)): - try: - logger.info(f"Login attempt for email: {user.email}") - - db_user = db.query(User).filter(User.email == user.email).first() - - # Check if user exists - if not db_user: - logger.warning(f"Login failed - user not found: {user.email}") - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Incorrect email or password", - headers={"WWW-Authenticate": "Bearer"}, - ) - - logger.info(f"User found: {user.email}, is_google_user: {db_user.is_google_user}, has_password: {db_user.password_hash is not None}") - - # Check if user can login with password - if not db_user.can_login_with_password(): - logger.warning(f"Login failed - Google user attempted password login: {user.email}") - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="This account uses Google sign-in. Please use Google to login.", - headers={"WWW-Authenticate": "Bearer"}, - ) - - # Verify password - if not verify_password(user.password, db_user.password_hash): - logger.warning(f"Login failed - incorrect password: {user.email}") - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Incorrect email or password", - headers={"WWW-Authenticate": "Bearer"}, - ) - - # Create token - access_token = create_access_token(data={"sub": db_user.email}) - logger.info(f"Login successful for user: {user.email}") - - return {"access_token": access_token, "token_type": "bearer"} - - except HTTPException: - raise - except Exception as e: - logger.error(f"Login error for {user.email}: {str(e)}") - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Internal server error during login" - ) - - # Google OAuth Models class GoogleTokenLogin(BaseModel): id_token: str @@ -199,6 +27,12 @@ class GoogleOAuthURL(BaseModel): oauth_url: str +class GoogleAuthResponse(BaseModel): + google_token: str + token_type: str = "bearer" + user: dict + + class GoogleUserResponse(BaseModel): id: int email: str @@ -229,7 +63,7 @@ async def get_google_oauth_url(): return {"oauth_url": oauth_url} -@router.post("/google/login-with-token", response_model=Token) +@router.post("/google/login-with-token", response_model=GoogleAuthResponse) async def google_login_with_token( google_data: GoogleTokenLogin, db: Session = Depends(get_db) @@ -282,9 +116,21 @@ async def google_login_with_token( db.refresh(db_user) logger.info(f"Created new Google user: {db_user.email}") - # Create JWT token - access_token = create_access_token(data={"sub": db_user.email}) - return {"access_token": access_token, "token_type": "bearer"} + # Return Google token directly for authentication + return { + "google_token": google_data.id_token, + "token_type": "bearer", + "user": { + "id": db_user.id, + "email": db_user.email, + "first_name": db_user.first_name, + "last_name": db_user.last_name, + "is_google_user": db_user.is_google_user, + "email_verified": db_user.email_verified, + "profile_picture": db_user.profile_picture, + "created_at": str(db_user.created_at) + } + } except HTTPException: raise @@ -297,7 +143,7 @@ async def google_login_with_token( ) -@router.post("/google/login-with-code", response_model=Token) +@router.post("/google/login-with-code", response_model=GoogleAuthResponse) async def google_login_with_code( google_data: GoogleCodeLogin, db: Session = Depends(get_db) @@ -358,8 +204,21 @@ async def google_login_with_code( db.commit() db.refresh(db_user) - access_token = create_access_token(data={"sub": db_user.email}) - return {"access_token": access_token, "token_type": "bearer"} + # Return Google token directly for authentication + return { + "google_token": id_token, + "token_type": "bearer", + "user": { + "id": db_user.id, + "email": db_user.email, + "first_name": db_user.first_name, + "last_name": db_user.last_name, + "is_google_user": db_user.is_google_user, + "email_verified": db_user.email_verified, + "profile_picture": db_user.profile_picture, + "created_at": str(db_user.created_at) + } + } except HTTPException: raise diff --git a/app/routes/transcription.py b/app/routes/transcription.py index bb10bd7..f88955f 100644 --- a/app/routes/transcription.py +++ b/app/routes/transcription.py @@ -1,6 +1,7 @@ from fastapi import APIRouter, Depends, HTTPException, status, BackgroundTasks from sqlalchemy.orm import Session from pydantic import BaseModel +from typing import Optional from app.db.session import get_db from app.models.user import User from app.models.video import Video @@ -26,6 +27,21 @@ class TranscriptionStartResponse(BaseModel): video_id: int +class EditableTranscriptionResponse(BaseModel): + video_id: int + original_transcript: str + edited_transcript: Optional[str] = None + + +class EditTranscriptRequest(BaseModel): + edited_transcript: str + + +class EditTranscriptResponse(BaseModel): + message: str + video_id: int + + async def background_transcribe(video_id: int, video_s3_url: str, db: Session): try: # Update video status @@ -137,4 +153,81 @@ async def get_transcription( video_id=transcription.video_id, text=transcription.text, created_at=str(transcription.created_at) + ) + + +@router.get("/{video_id}/editable", response_model=EditableTranscriptionResponse) +async def get_editable_transcription( + video_id: int, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) +): + # Check if video exists and belongs to user + video = db.query(Video).filter( + Video.id == video_id, + Video.user_id == current_user.id + ).first() + + if not video: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Video not found" + ) + + # Get transcription + transcription = db.query(Transcription).filter( + Transcription.video_id == video_id + ).first() + + if not transcription: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Transcription not found" + ) + + return EditableTranscriptionResponse( + video_id=video_id, + original_transcript=transcription.text, + edited_transcript=transcription.edited_text + ) + + +@router.put("/{video_id}/editable", response_model=EditTranscriptResponse) +async def update_editable_transcription( + video_id: int, + request: EditTranscriptRequest, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) +): + # Check if video exists and belongs to user + video = db.query(Video).filter( + Video.id == video_id, + Video.user_id == current_user.id + ).first() + + if not video: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Video not found" + ) + + # Get transcription + transcription = db.query(Transcription).filter( + Transcription.video_id == video_id + ).first() + + if not transcription: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Transcription not found" + ) + + # Update edited transcript + transcription.edited_text = request.edited_transcript + transcription.is_edited = True + db.commit() + + return EditTranscriptResponse( + message="Transcript updated successfully", + video_id=video_id ) \ No newline at end of file diff --git a/app/routes/translation.py b/app/routes/translation.py index fa290c9..c1008b2 100644 --- a/app/routes/translation.py +++ b/app/routes/translation.py @@ -106,11 +106,14 @@ async def start_translation( detail="Translation already exists for this video" ) + # Use edited transcript if available, otherwise use original + transcript_text = transcription.edited_text if transcription.is_edited and transcription.edited_text else transcription.text + # Start background translation background_tasks.add_task( background_translate, video_id, - transcription.text, + transcript_text, video.language_to, video.language_from, db diff --git a/app/routes/videos.py b/app/routes/videos.py index 0e4e07b..b4ba1ee 100644 --- a/app/routes/videos.py +++ b/app/routes/videos.py @@ -2,12 +2,13 @@ import uuid from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form, BackgroundTasks from sqlalchemy.orm import Session from pydantic import BaseModel -from typing import List +from typing import List, Optional from app.db.session import get_db from app.models.user import User from app.models.video import Video from app.utils.auth import get_current_user from app.services.s3_service import upload_file_to_s3 +from app.services.language_detection_service import detect_language_from_video router = APIRouter() @@ -21,6 +22,7 @@ class VideoResponse(BaseModel): original_s3_url: str language_from: str language_to: str + source_language: Optional[str] = None status: str created_at: str @@ -32,6 +34,12 @@ class VideoUploadResponse(BaseModel): video_id: int message: str s3_url: str + detected_language: Optional[str] = None + + +class LanguageDetectionResponse(BaseModel): + video_id: int + detected_language: Optional[str] @router.post("/upload", response_model=VideoUploadResponse) @@ -73,12 +81,16 @@ async def upload_video( detail="Failed to upload video to S3" ) + # Detect language from video + detected_language = await detect_language_from_video(file_content) + # Save video metadata to database db_video = Video( user_id=current_user.id, original_s3_url=s3_url, language_from=language_from, language_to=language_to, + source_language=detected_language, status="uploaded" ) db.add(db_video) @@ -88,7 +100,8 @@ async def upload_video( return VideoUploadResponse( video_id=db_video.id, message="Video uploaded successfully", - s3_url=s3_url + s3_url=s3_url, + detected_language=detected_language ) @@ -104,6 +117,7 @@ async def get_user_videos( original_s3_url=video.original_s3_url, language_from=video.language_from, language_to=video.language_to, + source_language=video.source_language, status=video.status, created_at=str(video.created_at) ) for video in videos] @@ -132,6 +146,30 @@ async def get_video( original_s3_url=video.original_s3_url, language_from=video.language_from, language_to=video.language_to, + source_language=video.source_language, status=video.status, created_at=str(video.created_at) + ) + + +@router.get("/{video_id}/language", response_model=LanguageDetectionResponse) +async def get_video_language( + video_id: int, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) +): + video = db.query(Video).filter( + Video.id == video_id, + Video.user_id == current_user.id + ).first() + + if not video: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Video not found" + ) + + return LanguageDetectionResponse( + video_id=video.id, + detected_language=video.source_language ) \ No newline at end of file diff --git a/app/services/language_detection_service.py b/app/services/language_detection_service.py new file mode 100644 index 0000000..c3205b6 --- /dev/null +++ b/app/services/language_detection_service.py @@ -0,0 +1,55 @@ +import os +import tempfile +import whisper +import ffmpeg +from typing import Optional + + +async def detect_language_from_video(video_content: bytes) -> Optional[str]: + """ + Detect language from video using OpenAI Whisper + """ + try: + # Create temporary files + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as video_temp: + video_temp.write(video_content) + video_temp_path = video_temp.name + + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_temp: + audio_temp_path = audio_temp.name + + try: + # Extract audio from video using ffmpeg + ( + ffmpeg + .input(video_temp_path) + .output(audio_temp_path, acodec='pcm_s16le', ac=1, ar='16000') + .overwrite_output() + .run(quiet=True) + ) + + # Load Whisper model (using base model for language detection) + model = whisper.load_model("base") + + # Detect language + audio = whisper.load_audio(audio_temp_path) + audio = whisper.pad_or_trim(audio) + mel = whisper.log_mel_spectrogram(audio).to(model.device) + + # Detect language + _, probs = model.detect_language(mel) + detected_language = max(probs, key=probs.get) + + return detected_language + + finally: + # Clean up temporary files + if os.path.exists(video_temp_path): + os.unlink(video_temp_path) + if os.path.exists(audio_temp_path): + os.unlink(audio_temp_path) + + except Exception as e: + # Log error but don't fail the upload + print(f"Language detection failed: {e}") + return None \ No newline at end of file diff --git a/app/utils/auth.py b/app/utils/auth.py index 3a466ed..bfc1bcd 100644 --- a/app/utils/auth.py +++ b/app/utils/auth.py @@ -1,72 +1,56 @@ -import os -from datetime import datetime, timedelta -from typing import Optional -from jose import JWTError, jwt -from passlib.context import CryptContext -from fastapi import HTTPException, status, Depends -from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from fastapi import HTTPException, status, Depends, Header from sqlalchemy.orm import Session +from typing import Optional from app.db.session import get_db from app.models.user import User - -SECRET_KEY = os.getenv("SECRET_KEY", "your-secret-key-change-this-in-production") -ALGORITHM = "HS256" -ACCESS_TOKEN_EXPIRE_MINUTES = 30 - -pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") -security = HTTPBearer() - - -def verify_password(plain_password: str, hashed_password: str) -> bool: - return pwd_context.verify(plain_password, hashed_password) - - -def get_password_hash(password: str) -> str: - return pwd_context.hash(password) - - -def create_access_token(data: dict, expires_delta: Optional[timedelta] = None): - to_encode = data.copy() - if expires_delta: - expire = datetime.utcnow() + expires_delta - else: - expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) - to_encode.update({"exp": expire}) - encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) - return encoded_jwt - - -def verify_token(token: str) -> dict: - try: - payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) - return payload - except JWTError: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Could not validate credentials", - headers={"WWW-Authenticate": "Bearer"}, - ) +from app.services.google_oauth_service import GoogleOAuthService async def get_current_user( - credentials: HTTPAuthorizationCredentials = Depends(security), + authorization: Optional[str] = Header(None), db: Session = Depends(get_db) ) -> User: - token = credentials.credentials - payload = verify_token(token) - email: str = payload.get("sub") - if email is None: + if not authorization: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail="Could not validate credentials", + detail="Authorization header required", headers={"WWW-Authenticate": "Bearer"}, ) - user = db.query(User).filter(User.email == email).first() - if user is None: + # Extract token from "Bearer " format + if not authorization.startswith("Bearer "): raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail="Could not validate credentials", + detail="Invalid authorization header format", headers={"WWW-Authenticate": "Bearer"}, ) + + google_token = authorization.split(" ")[1] + + # Verify Google token + user_info = await GoogleOAuthService.verify_google_token(google_token) + if not user_info: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid Google token", + headers={"WWW-Authenticate": "Bearer"}, + ) + + # Find user by email + user = db.query(User).filter(User.email == user_info['email']).first() + if not user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="User not found", + headers={"WWW-Authenticate": "Bearer"}, + ) + + # Ensure user is a Google user + if not user.is_google_user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Only Google authentication is supported", + headers={"WWW-Authenticate": "Bearer"}, + ) + return user \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..4c2af8d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,68 @@ +version: '3.8' + +services: + api: + build: + context: . + dockerfile: Dockerfile + container_name: ai-video-dubbing-api + ports: + - "8000:8000" + volumes: + - ./storage:/app/storage + - ./alembic:/app/alembic + environment: + # Database + - DATABASE_URL=sqlite:////app/storage/db/db.sqlite + + # OpenAI (required for transcription and translation) + - OPENAI_API_KEY=${OPENAI_API_KEY} + + # AWS S3 (required for video storage) + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} + - AWS_REGION=${AWS_REGION:-us-east-1} + - S3_BUCKET_NAME=${S3_BUCKET_NAME} + + # Google OAuth (required for authentication) + - GOOGLE_CLIENT_ID=${GOOGLE_CLIENT_ID} + - GOOGLE_CLIENT_SECRET=${GOOGLE_CLIENT_SECRET} + - GOOGLE_REDIRECT_URI=${GOOGLE_REDIRECT_URI:-http://localhost:3000/auth/callback} + + # Application settings + - DEBUG=${DEBUG:-false} + - LOG_LEVEL=${LOG_LEVEL:-info} + + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + networks: + - app-network + + # Optional: Add a reverse proxy (uncomment if needed) + # nginx: + # image: nginx:alpine + # container_name: ai-video-dubbing-nginx + # ports: + # - "80:80" + # - "443:443" + # volumes: + # - ./nginx.conf:/etc/nginx/nginx.conf:ro + # - ./ssl:/etc/ssl:ro + # depends_on: + # - api + # restart: unless-stopped + # networks: + # - app-network + +networks: + app-network: + driver: bridge + +volumes: + storage: + driver: local \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c636747..daad55c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,10 +3,9 @@ uvicorn[standard]==0.24.0 python-multipart==0.0.6 sqlalchemy==2.0.23 alembic==1.12.1 -python-jose[cryptography]==3.3.0 -passlib[bcrypt]==1.7.4 boto3==1.34.0 openai==1.3.7 +openai-whisper==20231117 python-decouple==3.8 ruff==0.1.6 requests==2.31.0