import requests import json from .BaseAudioService import BaseAudioService import oss2 import uuid import logging import datetime import http.client from django.conf import settings from aliyunsdkcore.client import AcsClient from aliyunsdkcore.request import CommonRequest logger = logging.getLogger(__name__) class AliyunAudioService(BaseAudioService): def __init__(self, config): super().__init__(config['api_key'], config['api_secret']) self.config = config self.oss_key_id = config['oss_key_id'] self.oss_key_secret = config['oss_key_secret'] # 创建AcsClient实例 self.client = AcsClient( self.config['api_key'], self.config['api_secret'], "cn-shanghai" ) def get_token(self): # 创建request,并设置参数。 request = CommonRequest() request.set_method('POST') request.set_domain('nls-meta.cn-shanghai.aliyuncs.com') request.set_version('2019-02-28') request.set_action_name('CreateToken') try : response = self.client.do_action_with_exception(request) print(response) jss = json.loads(response) if 'Token' in jss and 'Id' in jss['Token']: token = jss['Token']['Id'] expire_time = jss['Token']['ExpireTime'] print("token = " + token) print("expireTime = " + str(expire_time)) return token except Exception as e: print(e) def synthesize_speech(self, text, language='en'): host = 'nls-gateway-cn-shanghai.aliyuncs.com' url = 'https://' + host + '/stream/v1/tts' # 设置HTTPS Headers。 http_headers = { 'Content-Type': 'application/json' } # 设置HTTPS Body。 body = {'appkey': self.config['app_id'], 'token': self.get_token(), 'text': text, 'format': 'mp3', 'sample_rate': 16000} body = json.dumps(body) print('The POST request body content: ' + body) # Python 2.x请使用httplib。 # conn = httplib.HTTPSConnection(host) # Python 3.x请使用http.client。 conn = http.client.HTTPSConnection(host) conn.request(method='POST', url=url, body=body, headers=http_headers) # 处理服务端返回的响应。 response = conn.getresponse() print('Response status and response reason:') print(response.status ,response.reason) content_type = response.getheader('Content-Type') print(content_type) body = response.read() # 随机生成一个文件名 random_uuid = uuid.uuid4() mp3_name = f"{random_uuid}.mp3" if 'audio/mpeg' == content_type : auth = oss2.Auth(self.config['oss_key_id'], self.config['oss_key_secret']) bucket = oss2.Bucket(auth, self.config['oss_endpoint'], self.config['oss_bucket']) # 生成今天的日期作为子目录 today = datetime.datetime.now().strftime('%Y%m%d') oss_key = f"{self.config['oss_audio_base_dir']}/{today}/{mp3_name}" bucket.put_object(oss_key, body) else : print('The POST request failed: ' + str(body)) conn.close() return '{}/{}'.format(self.config['oss_host'], oss_key) def synthesize_speech_raw(self, text, language='en'): """ Generate speech from text and return the raw audio data directly without storing it or creating a URL Args: text (str): The text to convert to speech language (str, optional): Language code. Defaults to 'en'. Returns: bytes: Raw audio data """ host = 'nls-gateway-cn-shanghai.aliyuncs.com' url = 'https://' + host + '/stream/v1/tts' # 设置HTTPS Headers。 http_headers = { 'Content-Type': 'application/json' } # 设置HTTPS Body。 body = {'appkey': self.config['app_id'], 'token': self.get_token(), 'text': text, 'format': 'mp3', 'sample_rate': 16000} body = json.dumps(body) logger.info('Synthesizing speech with raw data return') # Python 3.x请使用http.client。 conn = http.client.HTTPSConnection(host) conn.request(method='POST', url=url, body=body, headers=http_headers) # 处理服务端返回的响应。 response = conn.getresponse() content_type = response.getheader('Content-Type') body_data = response.read() if 'audio/mpeg' == content_type: logger.info('Successfully generated raw audio data') conn.close() return body_data else: logger.error(f'Failed to generate raw audio data: {str(body_data)}') conn.close() return None def recognize_speech(self, audio_data, language='en') -> str: app_key = self.config['app_id'] token = self.get_token() # 服务请求地址 url = 'https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/asr' # audioFile = '/path/to/nls-sample-16k.wav' # audioFile = './uploaded_files/audio.wav' # audio_format = 'pcm' # audio_format = 'mp3' audio_format = 'opus' # sample_rate = 16000 # sample_rate = 8000 enable_punctuation_prediction = True enable_inverse_text_normalization = True enable_voice_detection = False # 设置RESTful请求参数 request = url + '?appkey=' + app_key # request = request + '&format=' + audio_format # request = request + '&sample_rate=' + str(sample_rate) # if enable_punctuation_prediction : # request = request + '&enable_punctuation_prediction=' + 'true' # if enable_inverse_text_normalization : # request = request + '&enable_inverse_text_normalization=' + 'true' # if enable_voice_detection : # request = request + '&enable_voice_detection=' + 'true' # print('Request: ' + request) # # 读取音频文件 # with open(audioFile, mode = 'rb') as f: # audioContent = f.read() host = 'nls-gateway-cn-shanghai.aliyuncs.com' # 设置HTTPS请求头部 http_headers = { 'X-NLS-Token': token, 'Content-type': 'application/octet-stream', 'Content-Length': len(audio_data) } # Python 2.x使用httplib # conn = httplib.HTTPSConnection(host) # Python 3.x使用http.client conn = http.client.HTTPSConnection(host) conn.request(method='POST', url=request, body=audio_data, headers=http_headers) response = conn.getresponse() # print('Response status and response reason:') # print(response.status ,response.reason) body = response.read() try: print('Recognize response is:') body = json.loads(body) print(body) status = body['status'] if status == 20000000 : result = body['result'] print('Recognize result: ' + result) return result else : print('Recognizer failed!') return '识别失败' except ValueError: print('The response is not json format string') conn.close() return '识别失败2' if __name__ == '__main__': audio_ser = AliyunAudioService() audio_ser.synthesize_speech('你好,你是谁啊')