rtc_backend/apps/stories/services/opus_converter.py
repair-agent 134ccb70f3
All checks were successful
Build and Deploy Backend / build-and-deploy (push) Successful in 5m41s
fix 音频并发优化
2026-03-03 17:21:46 +08:00

73 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
MP3 → Opus 预转码服务
将 MP3 音频转为 Opus 帧列表JSON + base64供 hw_service_go 直接下载播放,
跳过实时 ffmpeg 转码,大幅降低首帧延迟和 CPU 消耗。
Opus 参数与 hw_service_go 保持一致16kHz, 单声道, 60ms/帧
"""
import base64
import json
import logging
import subprocess
import opuslib
logger = logging.getLogger(__name__)
SAMPLE_RATE = 16000
CHANNELS = 1
FRAME_DURATION_MS = 60
FRAME_SIZE = SAMPLE_RATE * FRAME_DURATION_MS // 1000 # 960 samples
BYTES_PER_FRAME = FRAME_SIZE * 2 # 16bit = 2 bytes per sample
def convert_mp3_to_opus_json(mp3_bytes: bytes) -> str:
"""
将 MP3 音频数据转码为 Opus 帧 JSON。
流程: MP3 bytes → ffmpeg(PCM 16kHz mono s16le) → opuslib(60ms Opus 帧)
Returns:
JSON 字符串,包含 base64 编码的 Opus 帧列表
"""
# 1. ffmpeg: MP3 → PCM (16kHz, mono, signed 16-bit little-endian)
proc = subprocess.run(
[
'ffmpeg', '-nostdin', '-loglevel', 'error',
'-i', 'pipe:0',
'-ar', str(SAMPLE_RATE),
'-ac', str(CHANNELS),
'-f', 's16le',
'pipe:1',
],
input=mp3_bytes,
capture_output=True,
timeout=120,
)
if proc.returncode != 0:
stderr = proc.stderr.decode(errors='replace')
raise RuntimeError(f'ffmpeg 转码失败: {stderr}')
pcm = proc.stdout
if len(pcm) < BYTES_PER_FRAME:
raise RuntimeError(f'PCM 数据过短: {len(pcm)} bytes')
# 2. Opus 编码:逐帧编码
encoder = opuslib.Encoder(SAMPLE_RATE, CHANNELS, 'audio')
frames = []
for offset in range(0, len(pcm) - BYTES_PER_FRAME + 1, BYTES_PER_FRAME):
chunk = pcm[offset:offset + BYTES_PER_FRAME]
opus_frame = encoder.encode(chunk, FRAME_SIZE)
frames.append(base64.b64encode(opus_frame).decode('ascii'))
logger.info(f'Opus 预转码完成: {len(frames)} 帧, '
f'{len(frames) * FRAME_DURATION_MS / 1000:.1f}s 音频')
return json.dumps({
'sample_rate': SAMPLE_RATE,
'channels': CHANNELS,
'frame_duration_ms': FRAME_DURATION_MS,
'frames': frames,
}, separators=(',', ':')) # 紧凑格式,减少体积