"""Media utilities: extract video thumbnails and durations using ffmpeg/ffprobe. WARNING: These functions download files and run subprocess commands. They MUST only be called from Celery tasks, NEVER from HTTP request handlers. Calling from gunicorn (especially with gevent workers) will block the worker pool. """ import logging import subprocess import tempfile import os import requests from django.core.files.uploadedfile import SimpleUploadedFile logger = logging.getLogger(__name__) MAX_DOWNLOAD_SIZE = 100 * 1024 * 1024 # 100MB safety limit def download_to_temp(url: str, suffix: str) -> str: """Download a URL to a temporary file. Returns the temp file path. Only accepts http/https URLs to prevent SSRF. """ if not url.startswith(('http://', 'https://')): raise ValueError(f'Invalid URL scheme: {url[:30]}') resp = requests.get(url, timeout=30, stream=True) resp.raise_for_status() tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False) downloaded = 0 try: for chunk in resp.iter_content(8192): downloaded += len(chunk) if downloaded > MAX_DOWNLOAD_SIZE: tmp.close() os.unlink(tmp.name) raise ValueError(f'File too large: {downloaded} bytes') tmp.write(chunk) tmp.close() except Exception: tmp.close() if os.path.exists(tmp.name): os.unlink(tmp.name) raise return tmp.name def _get_duration_ffprobe(file_path: str) -> float: """Get media duration in seconds using ffprobe.""" try: result = subprocess.run( ['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path], capture_output=True, text=True, timeout=15, ) return float(result.stdout.strip()) except Exception as e: logger.warning('ffprobe duration failed: %s', e) return 0 def _extract_first_frame(video_path: str, output_path: str) -> bool: """Extract the first frame of a video as JPEG using ffmpeg.""" try: subprocess.run( ['ffmpeg', '-y', '-i', video_path, '-vframes', '1', '-f', 'image2', '-q:v', '2', output_path], capture_output=True, timeout=15, ) return os.path.exists(output_path) and os.path.getsize(output_path) > 0 except Exception as e: logger.warning('ffmpeg frame extraction failed: %s', e) return False def extract_video_info_from_file(video_path: str) -> tuple: """Extract first frame thumbnail + duration from a local video file. Returns (thumbnail_file: SimpleUploadedFile | None, duration: float). Does NOT delete the input file — caller is responsible for cleanup. """ tmp_thumb = None try: duration = _get_duration_ffprobe(video_path) tmp_thumb = video_path + '_thumb.jpg' if _extract_first_frame(video_path, tmp_thumb): with open(tmp_thumb, 'rb') as f: thumb_file = SimpleUploadedFile( 'thumbnail.jpg', f.read(), content_type='image/jpeg' ) return thumb_file, duration return None, duration except Exception as e: logger.warning('extract_video_info_from_file failed: %s', e) return None, 0 finally: if tmp_thumb and os.path.exists(tmp_thumb): os.unlink(tmp_thumb) def extract_video_info(video_url: str) -> tuple: """Extract first frame thumbnail + duration from a video URL. Returns (thumbnail_file: SimpleUploadedFile | None, duration: float). NOTE: This function downloads the full video. For large files, call from Celery tasks only — never from HTTP request handlers. """ tmp_video = None try: suffix = '.mp4' if '.mov' in video_url.lower(): suffix = '.mov' tmp_video = download_to_temp(video_url, suffix) return extract_video_info_from_file(tmp_video) except Exception as e: logger.warning('extract_video_info failed for %s: %s', video_url, e) return None, 0 finally: if tmp_video and os.path.exists(tmp_video): os.unlink(tmp_video) def get_audio_duration(audio_url: str) -> float: """Get audio duration in seconds from a URL.""" tmp_audio = None try: suffix = '.wav' if '.wav' in audio_url.lower() else '.mp3' tmp_audio = download_to_temp(audio_url, suffix) return _get_duration_ffprobe(tmp_audio) except Exception as e: logger.warning('get_audio_duration failed for %s: %s', audio_url, e) return 0 finally: if tmp_audio and os.path.exists(tmp_audio): os.unlink(tmp_audio)