219 lines
7.5 KiB
Python
219 lines
7.5 KiB
Python
import requests
|
||
import json
|
||
from .BaseAudioService import BaseAudioService
|
||
import oss2
|
||
import uuid
|
||
import logging
|
||
import datetime
|
||
|
||
import http.client
|
||
from django.conf import settings
|
||
|
||
from aliyunsdkcore.client import AcsClient
|
||
from aliyunsdkcore.request import CommonRequest
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
class AliyunAudioService(BaseAudioService):
|
||
|
||
def __init__(self, config):
|
||
super().__init__(config['api_key'], config['api_secret'])
|
||
self.config = config
|
||
self.oss_key_id = config['oss_key_id']
|
||
self.oss_key_secret = config['oss_key_secret']
|
||
# 创建AcsClient实例
|
||
self.client = AcsClient(
|
||
self.config['api_key'],
|
||
self.config['api_secret'],
|
||
"cn-shanghai"
|
||
)
|
||
|
||
def get_token(self):
|
||
|
||
# 创建request,并设置参数。
|
||
request = CommonRequest()
|
||
request.set_method('POST')
|
||
request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
|
||
request.set_version('2019-02-28')
|
||
request.set_action_name('CreateToken')
|
||
|
||
try :
|
||
response = self.client.do_action_with_exception(request)
|
||
print(response)
|
||
|
||
jss = json.loads(response)
|
||
if 'Token' in jss and 'Id' in jss['Token']:
|
||
token = jss['Token']['Id']
|
||
expire_time = jss['Token']['ExpireTime']
|
||
print("token = " + token)
|
||
print("expireTime = " + str(expire_time))
|
||
return token
|
||
except Exception as e:
|
||
print(e)
|
||
|
||
def synthesize_speech(self, text, language='en'):
|
||
host = 'nls-gateway-cn-shanghai.aliyuncs.com'
|
||
url = 'https://' + host + '/stream/v1/tts'
|
||
# 设置HTTPS Headers。
|
||
http_headers = {
|
||
'Content-Type': 'application/json'
|
||
}
|
||
# 设置HTTPS Body。
|
||
body = {'appkey': self.config['app_id'], 'token': self.get_token(), 'text': text, 'format': 'mp3', 'sample_rate': 16000}
|
||
body = json.dumps(body)
|
||
print('The POST request body content: ' + body)
|
||
# Python 2.x请使用httplib。
|
||
# conn = httplib.HTTPSConnection(host)
|
||
# Python 3.x请使用http.client。
|
||
conn = http.client.HTTPSConnection(host)
|
||
conn.request(method='POST', url=url, body=body, headers=http_headers)
|
||
# 处理服务端返回的响应。
|
||
response = conn.getresponse()
|
||
print('Response status and response reason:')
|
||
print(response.status ,response.reason)
|
||
content_type = response.getheader('Content-Type')
|
||
print(content_type)
|
||
body = response.read()
|
||
# 随机生成一个文件名
|
||
random_uuid = uuid.uuid4()
|
||
mp3_name = f"{random_uuid}.mp3"
|
||
if 'audio/mpeg' == content_type :
|
||
auth = oss2.Auth(self.config['oss_key_id'], self.config['oss_key_secret'])
|
||
bucket = oss2.Bucket(auth, self.config['oss_endpoint'], self.config['oss_bucket'])
|
||
# 生成今天的日期作为子目录
|
||
today = datetime.datetime.now().strftime('%Y%m%d')
|
||
oss_key = f"{self.config['oss_audio_base_dir']}/{today}/{mp3_name}"
|
||
bucket.put_object(oss_key, body)
|
||
|
||
|
||
else :
|
||
print('The POST request failed: ' + str(body))
|
||
conn.close()
|
||
return '{}/{}'.format(self.config['oss_host'], oss_key)
|
||
|
||
def synthesize_speech_raw(self, text, language='en'):
|
||
"""
|
||
Generate speech from text and return the raw audio data directly
|
||
without storing it or creating a URL
|
||
|
||
Args:
|
||
text (str): The text to convert to speech
|
||
language (str, optional): Language code. Defaults to 'en'.
|
||
|
||
Returns:
|
||
bytes: Raw audio data
|
||
"""
|
||
host = 'nls-gateway-cn-shanghai.aliyuncs.com'
|
||
url = 'https://' + host + '/stream/v1/tts'
|
||
# 设置HTTPS Headers。
|
||
http_headers = {
|
||
'Content-Type': 'application/json'
|
||
}
|
||
# 设置HTTPS Body。
|
||
body = {'appkey': self.config['app_id'], 'token': self.get_token(), 'text': text, 'format': 'mp3', 'sample_rate': 16000}
|
||
body = json.dumps(body)
|
||
logger.info('Synthesizing speech with raw data return')
|
||
# Python 3.x请使用http.client。
|
||
conn = http.client.HTTPSConnection(host)
|
||
conn.request(method='POST', url=url, body=body, headers=http_headers)
|
||
# 处理服务端返回的响应。
|
||
response = conn.getresponse()
|
||
content_type = response.getheader('Content-Type')
|
||
body_data = response.read()
|
||
|
||
if 'audio/mpeg' == content_type:
|
||
logger.info('Successfully generated raw audio data')
|
||
conn.close()
|
||
return body_data
|
||
else:
|
||
logger.error(f'Failed to generate raw audio data: {str(body_data)}')
|
||
conn.close()
|
||
return None
|
||
|
||
def recognize_speech(self, audio_data, language='en') -> str:
|
||
|
||
app_key = self.config['app_id']
|
||
token = self.get_token()
|
||
|
||
# 服务请求地址
|
||
url = 'https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/asr'
|
||
|
||
# audioFile = '/path/to/nls-sample-16k.wav'
|
||
# audioFile = './uploaded_files/audio.wav'
|
||
# audio_format = 'pcm'
|
||
# audio_format = 'mp3'
|
||
audio_format = 'opus'
|
||
# sample_rate = 16000
|
||
# sample_rate = 8000
|
||
enable_punctuation_prediction = True
|
||
enable_inverse_text_normalization = True
|
||
enable_voice_detection = False
|
||
|
||
# 设置RESTful请求参数
|
||
request = url + '?appkey=' + app_key
|
||
# request = request + '&format=' + audio_format
|
||
# request = request + '&sample_rate=' + str(sample_rate)
|
||
|
||
# if enable_punctuation_prediction :
|
||
# request = request + '&enable_punctuation_prediction=' + 'true'
|
||
|
||
# if enable_inverse_text_normalization :
|
||
# request = request + '&enable_inverse_text_normalization=' + 'true'
|
||
|
||
# if enable_voice_detection :
|
||
# request = request + '&enable_voice_detection=' + 'true'
|
||
|
||
# print('Request: ' + request)
|
||
|
||
# # 读取音频文件
|
||
# with open(audioFile, mode = 'rb') as f:
|
||
# audioContent = f.read()
|
||
|
||
host = 'nls-gateway-cn-shanghai.aliyuncs.com'
|
||
|
||
# 设置HTTPS请求头部
|
||
http_headers = {
|
||
'X-NLS-Token': token,
|
||
'Content-type': 'application/octet-stream',
|
||
'Content-Length': len(audio_data)
|
||
}
|
||
|
||
# Python 2.x使用httplib
|
||
# conn = httplib.HTTPSConnection(host)
|
||
|
||
# Python 3.x使用http.client
|
||
conn = http.client.HTTPSConnection(host)
|
||
|
||
conn.request(method='POST', url=request, body=audio_data, headers=http_headers)
|
||
|
||
response = conn.getresponse()
|
||
# print('Response status and response reason:')
|
||
# print(response.status ,response.reason)
|
||
|
||
body = response.read()
|
||
try:
|
||
print('Recognize response is:')
|
||
body = json.loads(body)
|
||
print(body)
|
||
|
||
status = body['status']
|
||
if status == 20000000 :
|
||
result = body['result']
|
||
print('Recognize result: ' + result)
|
||
return result
|
||
else :
|
||
print('Recognizer failed!')
|
||
return '识别失败'
|
||
|
||
except ValueError:
|
||
print('The response is not json format string')
|
||
|
||
conn.close()
|
||
return '识别失败2'
|
||
|
||
|
||
|
||
if __name__ == '__main__':
|
||
audio_ser = AliyunAudioService()
|
||
audio_ser.synthesize_speech('你好,你是谁啊')
|