
- Auto language detection using OpenAI Whisper during video upload
- Editable transcript interface for reviewing/correcting transcriptions
- Updated translation pipeline to use edited transcripts when available
- Migrated from JWT to Google OAuth-only authentication for better security
- Added complete Docker containerization with docker-compose.yml
- Updated database schema with language detection and transcript editing fields
- Enhanced API documentation and workflow in README
- Added comprehensive environment variable configuration
🤖 Generated with BackendIM
Co-Authored-By: Claude <noreply@anthropic.com>
55 lines
1.8 KiB
Python
55 lines
1.8 KiB
Python
import os
|
|
import tempfile
|
|
import whisper
|
|
import ffmpeg
|
|
from typing import Optional
|
|
|
|
|
|
async def detect_language_from_video(video_content: bytes) -> Optional[str]:
|
|
"""
|
|
Detect language from video using OpenAI Whisper
|
|
"""
|
|
try:
|
|
# Create temporary files
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as video_temp:
|
|
video_temp.write(video_content)
|
|
video_temp_path = video_temp.name
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_temp:
|
|
audio_temp_path = audio_temp.name
|
|
|
|
try:
|
|
# Extract audio from video using ffmpeg
|
|
(
|
|
ffmpeg
|
|
.input(video_temp_path)
|
|
.output(audio_temp_path, acodec='pcm_s16le', ac=1, ar='16000')
|
|
.overwrite_output()
|
|
.run(quiet=True)
|
|
)
|
|
|
|
# Load Whisper model (using base model for language detection)
|
|
model = whisper.load_model("base")
|
|
|
|
# Detect language
|
|
audio = whisper.load_audio(audio_temp_path)
|
|
audio = whisper.pad_or_trim(audio)
|
|
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
|
|
|
# Detect language
|
|
_, probs = model.detect_language(mel)
|
|
detected_language = max(probs, key=probs.get)
|
|
|
|
return detected_language
|
|
|
|
finally:
|
|
# Clean up temporary files
|
|
if os.path.exists(video_temp_path):
|
|
os.unlink(video_temp_path)
|
|
if os.path.exists(audio_temp_path):
|
|
os.unlink(audio_temp_path)
|
|
|
|
except Exception as e:
|
|
# Log error but don't fail the upload
|
|
print(f"Language detection failed: {e}")
|
|
return None |