
- Updated OpenAI package from 1.3.7 to 1.51.0 for latest API compatibility
- Added PyTorch and torchaudio dependencies for Whisper model support
- Fixed OpenAI API calls to use new AsyncOpenAI client format
- Updated transcription service to use client.audio.transcriptions.create()
- Updated translation service to use client.chat.completions.create()
- Added proper logging to language detection service
- Added environment variable loading with python-dotenv in main.py
- Fixed import order to comply with linting standards
🤖 Generated with BackendIM
Co-Authored-By: Claude <noreply@anthropic.com>
58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
import os
|
|
import tempfile
|
|
import whisper
|
|
import ffmpeg
|
|
from typing import Optional
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def detect_language_from_video(video_content: bytes) -> Optional[str]:
|
|
"""
|
|
Detect language from video using OpenAI Whisper
|
|
"""
|
|
try:
|
|
# Create temporary files
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as video_temp:
|
|
video_temp.write(video_content)
|
|
video_temp_path = video_temp.name
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_temp:
|
|
audio_temp_path = audio_temp.name
|
|
|
|
try:
|
|
# Extract audio from video using ffmpeg
|
|
(
|
|
ffmpeg
|
|
.input(video_temp_path)
|
|
.output(audio_temp_path, acodec='pcm_s16le', ac=1, ar='16000')
|
|
.overwrite_output()
|
|
.run(quiet=True)
|
|
)
|
|
|
|
# Load Whisper model (using base model for language detection)
|
|
model = whisper.load_model("base")
|
|
|
|
# Detect language
|
|
audio = whisper.load_audio(audio_temp_path)
|
|
audio = whisper.pad_or_trim(audio)
|
|
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
|
|
|
# Detect language
|
|
_, probs = model.detect_language(mel)
|
|
detected_language = max(probs, key=probs.get)
|
|
|
|
return detected_language
|
|
|
|
finally:
|
|
# Clean up temporary files
|
|
if os.path.exists(video_temp_path):
|
|
os.unlink(video_temp_path)
|
|
if os.path.exists(audio_temp_path):
|
|
os.unlink(audio_temp_path)
|
|
|
|
except Exception as e:
|
|
# Log error but don't fail the upload
|
|
logger.error(f"Language detection failed: {e}")
|
|
return None |