video-shuoshan/backend/utils/media_utils.py
seaislee1209 9a6d95a69d
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 3m13s
fix: v0.18.0 商业级加固 — 并发安全、流式上传、错误反馈、类型修复
- TOS 流式上传 upload_from_file_path(避免大文件 OOM)
- 视频生成完成后下载一次复用(TOS 上传 + 首帧提取)
- 并发安全:group thumbnail 用 select_for_update 原子更新
- 跨团队校验:_resolve_asset_group_all 加 group__team 过滤
- 异常信息脱敏:文件上传失败不再泄露内部异常
- SSRF 防护:download_to_temp 校验 URL scheme
- poll lock 终态释放:cache.delete 在 record.save 后调用
- duration=null 语义区分:ffprobe 失败存 None 非 0
- 前端 duration 未知 toast 警告:素材时长未确定时提示用户
- 搜索 API 失败 toast:素材搜索失败时反馈用户
- 视频保存降级标记:临时 URL 降级时设 error_message
- TypeScript 类型修复:AssetItem/AssetSearchResult.duration 改为 number|null
- rebuildMentionSpans 补完 assetId/assetType/assetName/duration 属性
- paste DOMPurify 白名单补完新 data attributes
- resolved_url NameError 修复:非素材库视频/音频引用用 url
- process_asset_media group 删除保护
- download_to_temp 改为 public API
- 清理前端死代码

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 18:49:08 +08:00

135 lines
4.7 KiB
Python

"""Media utilities: extract video thumbnails and durations using ffmpeg/ffprobe.
WARNING: These functions download files and run subprocess commands.
They MUST only be called from Celery tasks, NEVER from HTTP request handlers.
Calling from gunicorn (especially with gevent workers) will block the worker pool.
"""
import logging
import subprocess
import tempfile
import os
import requests
from django.core.files.uploadedfile import SimpleUploadedFile
logger = logging.getLogger(__name__)
MAX_DOWNLOAD_SIZE = 100 * 1024 * 1024 # 100MB safety limit
def download_to_temp(url: str, suffix: str) -> str:
"""Download a URL to a temporary file. Returns the temp file path.
Only accepts http/https URLs to prevent SSRF.
"""
if not url.startswith(('http://', 'https://')):
raise ValueError(f'Invalid URL scheme: {url[:30]}')
resp = requests.get(url, timeout=30, stream=True)
resp.raise_for_status()
tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
downloaded = 0
try:
for chunk in resp.iter_content(8192):
downloaded += len(chunk)
if downloaded > MAX_DOWNLOAD_SIZE:
tmp.close()
os.unlink(tmp.name)
raise ValueError(f'File too large: {downloaded} bytes')
tmp.write(chunk)
tmp.close()
except Exception:
tmp.close()
if os.path.exists(tmp.name):
os.unlink(tmp.name)
raise
return tmp.name
def _get_duration_ffprobe(file_path: str) -> float:
"""Get media duration in seconds using ffprobe."""
try:
result = subprocess.run(
['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1', file_path],
capture_output=True, text=True, timeout=15,
)
return float(result.stdout.strip())
except Exception as e:
logger.warning('ffprobe duration failed: %s', e)
return 0
def _extract_first_frame(video_path: str, output_path: str) -> bool:
"""Extract the first frame of a video as JPEG using ffmpeg."""
try:
subprocess.run(
['ffmpeg', '-y', '-i', video_path, '-vframes', '1',
'-f', 'image2', '-q:v', '2', output_path],
capture_output=True, timeout=15,
)
return os.path.exists(output_path) and os.path.getsize(output_path) > 0
except Exception as e:
logger.warning('ffmpeg frame extraction failed: %s', e)
return False
def extract_video_info_from_file(video_path: str) -> tuple:
"""Extract first frame thumbnail + duration from a local video file.
Returns (thumbnail_file: SimpleUploadedFile | None, duration: float).
Does NOT delete the input file — caller is responsible for cleanup.
"""
tmp_thumb = None
try:
duration = _get_duration_ffprobe(video_path)
tmp_thumb = video_path + '_thumb.jpg'
if _extract_first_frame(video_path, tmp_thumb):
with open(tmp_thumb, 'rb') as f:
thumb_file = SimpleUploadedFile(
'thumbnail.jpg', f.read(), content_type='image/jpeg'
)
return thumb_file, duration
return None, duration
except Exception as e:
logger.warning('extract_video_info_from_file failed: %s', e)
return None, 0
finally:
if tmp_thumb and os.path.exists(tmp_thumb):
os.unlink(tmp_thumb)
def extract_video_info(video_url: str) -> tuple:
"""Extract first frame thumbnail + duration from a video URL.
Returns (thumbnail_file: SimpleUploadedFile | None, duration: float).
NOTE: This function downloads the full video. For large files, call from
Celery tasks only — never from HTTP request handlers.
"""
tmp_video = None
try:
suffix = '.mp4'
if '.mov' in video_url.lower():
suffix = '.mov'
tmp_video = download_to_temp(video_url, suffix)
return extract_video_info_from_file(tmp_video)
except Exception as e:
logger.warning('extract_video_info failed for %s: %s', video_url, e)
return None, 0
finally:
if tmp_video and os.path.exists(tmp_video):
os.unlink(tmp_video)
def get_audio_duration(audio_url: str) -> float:
"""Get audio duration in seconds from a URL."""
tmp_audio = None
try:
suffix = '.wav' if '.wav' in audio_url.lower() else '.mp3'
tmp_audio = download_to_temp(audio_url, suffix)
return _get_duration_ffprobe(tmp_audio)
except Exception as e:
logger.warning('get_audio_duration failed for %s: %s', audio_url, e)
return 0
finally:
if tmp_audio and os.path.exists(tmp_audio):
os.unlink(tmp_audio)