Pendant_Rtc_Toy/scripts/p3_tools/convert_audio_to_p3 copy.py
Rdzleo 93f0e19d1d 初始化项目:精灵吊坠 RTC 语音助手 + VEML7700 石头同频匹配
ESP32-S3 吊坠设备固件,集成火山引擎 RTC 语音助手、蓝牙配网、
VEML7700 环境光传感器驱动及石头同频匹配交友功能。

VEML7700 驱动:
- 基于 ESP-IDF i2c_master API 实现,复用项目 I2cDevice 基类
- 支持 ALS + White 双通道、自动量程、Vishay 非线性校正
- 3 次采样取中位数过滤偶发异常

石头同频匹配算法(双维度):
- 维度1:光谱比值 ALS/White(石头固有光学特征,不随光照强度变化)
- 维度2:亮度等级(5级对数划分,排除极端环境差异)
- 比值阈值 15%,实测同石头姿势变化波动 1.6%~9.6%,安全余量充足

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 11:43:57 +08:00

71 lines
3.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# convert audio files to protocol v3 stream
import librosa
import opuslib
import struct
import sys
import tqdm
import numpy as np
import argparse
import pyloudnorm as pyln
def encode_audio_to_opus(input_file, output_file, target_lufs=None):
# Load audio file using librosa
audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.float32)
# Convert mono to stereo if necessary
if audio.ndim == 1: # 检查是否为单声道
audio = np.stack([audio, audio], axis=0) # 复制单声道数据到两个声道,创建立体声
if target_lufs is not None:
print("Note: Automatic loudness adjustment is enabled, which may cause", file=sys.stderr)
print(" audio distortion. If the input audio has already been ", file=sys.stderr)
print(" loudness-adjusted or if the input audio is TTS audio, ", file=sys.stderr)
print(" please use the `-d` parameter to disable loudness adjustment.", file=sys.stderr)
meter = pyln.Meter(sample_rate)
current_loudness = meter.integrated_loudness(audio)
audio = pyln.normalize.loudness(audio, current_loudness, target_lufs)
print(f"Adjusted loudness: {current_loudness:.1f} LUFS -> {target_lufs} LUFS")
# Convert sample rate to 16000Hz if necessary
target_sample_rate = 16000
if sample_rate != target_sample_rate:
audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
sample_rate = target_sample_rate
# Convert audio data back to int16 after processing
# 对于立体声需要先调整形状然后转换为int16
audio = (audio * 32767).astype(np.int16)
# Initialize Opus encoder
encoder = opuslib.Encoder(sample_rate, 2, opuslib.APPLICATION_AUDIO)
# Encode and save
with open(output_file, 'wb') as f:
duration = 60 # 60ms per frame
frame_size = int(sample_rate * duration / 1000)
# 对于立体声audio.shape[0]是2两个声道audio.shape[1]是采样点数
for i in tqdm.tqdm(range(0, audio.shape[1] - frame_size, frame_size)):
# 提取当前帧的两个声道数据
frame_left = audio[0, i:i + frame_size]
frame_right = audio[1, i:i + frame_size]
# 交错两个声道的数据L1, R1, L2, R2, ...
interleaved = np.empty((frame_size * 2,), dtype=np.int16)
interleaved[0::2] = frame_left
interleaved[1::2] = frame_right
# 编码交错的数据
opus_data = encoder.encode(interleaved.tobytes(), frame_size=frame_size)
packet = struct.pack('>BBH', 0, 0, len(opus_data)) + opus_data
f.write(packet)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert audio to Opus with loudness normalization')
parser.add_argument('input_file', help='Input audio file')
parser.add_argument('output_file', help='Output .opus file')
parser.add_argument('-l', '--lufs', type=float, default=-16.0,
help='Target loudness in LUFS (default: -16)')
parser.add_argument('-d', '--disable-loudnorm', action='store_true',
help='Disable loudness normalization')
args = parser.parse_args()
target_lufs = None if args.disable_loudnorm else args.lufs
encode_audio_to_opus(args.input_file, args.output_file, target_lufs)