lty/qy_lty/aiapp/audio/AliyunAudioService.py

import requests
import json
from .BaseAudioService import BaseAudioService
import oss2
import uuid
import logging
import datetime

import http.client
from django.conf import settings

from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.request import CommonRequest

logger = logging.getLogger(__name__)

class AliyunAudioService(BaseAudioService):

    def __init__(self, config):
        super().__init__(config['api_key'], config['api_secret'])
        self.config = config
        self.oss_key_id = config['oss_key_id']
        self.oss_key_secret = config['oss_key_secret']
        # 创建AcsClient实例
        self.client = AcsClient(
            self.config['api_key'],
            self.config['api_secret'],
            "cn-shanghai"
            )

    def get_token(self):

        # 创建request，并设置参数。
        request = CommonRequest()
        request.set_method('POST')
        request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
        request.set_version('2019-02-28')
        request.set_action_name('CreateToken')

        try :
            response = self.client.do_action_with_exception(request)
            print(response)

            jss = json.loads(response)
            if 'Token' in jss and 'Id' in jss['Token']:
                token = jss['Token']['Id']
                expire_time = jss['Token']['ExpireTime']
                print("token = " + token)
                print("expireTime = " + str(expire_time))
                return token
        except Exception as e:
            print(e)

    def synthesize_speech(self, text, language='en'):
        host = 'nls-gateway-cn-shanghai.aliyuncs.com'
        url = 'https://' + host + '/stream/v1/tts'
        # 设置HTTPS Headers。
        http_headers = {
            'Content-Type': 'application/json'
            }
        # 设置HTTPS Body。
        body = {'appkey': self.config['app_id'], 'token': self.get_token(), 'text': text, 'format': 'mp3', 'sample_rate': 16000}
        body = json.dumps(body)
        print('The POST request body content: ' + body)
        # Python 2.x请使用httplib。
        # conn = httplib.HTTPSConnection(host)
        # Python 3.x请使用http.client。
        conn = http.client.HTTPSConnection(host)
        conn.request(method='POST', url=url, body=body, headers=http_headers)
        # 处理服务端返回的响应。
        response = conn.getresponse()
        print('Response status and response reason:')
        print(response.status ,response.reason)
        content_type = response.getheader('Content-Type')
        print(content_type)
        body = response.read()
        # 随机生成一个文件名
        random_uuid = uuid.uuid4()
        mp3_name = f"{random_uuid}.mp3"
        if 'audio/mpeg' == content_type :
            auth = oss2.Auth(self.config['oss_key_id'], self.config['oss_key_secret'])
            bucket = oss2.Bucket(auth, self.config['oss_endpoint'], self.config['oss_bucket'])
            # 生成今天的日期作为子目录
            today = datetime.datetime.now().strftime('%Y%m%d')
            oss_key = f"{self.config['oss_audio_base_dir']}/{today}/{mp3_name}"
            bucket.put_object(oss_key, body)


        else :
            print('The POST request failed: ' + str(body))
        conn.close()
        return '{}/{}'.format(self.config['oss_host'], oss_key)

    def synthesize_speech_raw(self, text, language='en'):
        """
        Generate speech from text and return the raw audio data directly
        without storing it or creating a URL

        Args:
            text (str): The text to convert to speech
            language (str, optional): Language code. Defaults to 'en'.

        Returns:
            bytes: Raw audio data
        """
        host = 'nls-gateway-cn-shanghai.aliyuncs.com'
        url = 'https://' + host + '/stream/v1/tts'
        # 设置HTTPS Headers。
        http_headers = {
            'Content-Type': 'application/json'
        }
        # 设置HTTPS Body。
        body = {'appkey': self.config['app_id'], 'token': self.get_token(), 'text': text, 'format': 'mp3', 'sample_rate': 16000}
        body = json.dumps(body)
        logger.info('Synthesizing speech with raw data return')
        # Python 3.x请使用http.client。
        conn = http.client.HTTPSConnection(host)
        conn.request(method='POST', url=url, body=body, headers=http_headers)
        # 处理服务端返回的响应。
        response = conn.getresponse()
        content_type = response.getheader('Content-Type')
        body_data = response.read()

        if 'audio/mpeg' == content_type:
            logger.info('Successfully generated raw audio data')
            conn.close()
            return body_data
        else:
            logger.error(f'Failed to generate raw audio data: {str(body_data)}')
            conn.close()
            return None

    def recognize_speech(self, audio_data, language='en') -> str:

        app_key = self.config['app_id']
        token = self.get_token()

        # 服务请求地址
        url = 'https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/asr'

        # audioFile = '/path/to/nls-sample-16k.wav'
        # audioFile = './uploaded_files/audio.wav'
        # audio_format = 'pcm'
        # audio_format = 'mp3'
        audio_format = 'opus'
        # sample_rate = 16000
        # sample_rate = 8000
        enable_punctuation_prediction  = True
        enable_inverse_text_normalization = True
        enable_voice_detection  = False

        # 设置RESTful请求参数
        request = url + '?appkey=' + app_key
        # request = request + '&format=' + audio_format
        # request = request + '&sample_rate=' + str(sample_rate)

        # if enable_punctuation_prediction :
        #     request = request + '&enable_punctuation_prediction=' + 'true'

        # if enable_inverse_text_normalization :
        #     request = request + '&enable_inverse_text_normalization=' + 'true'

        # if enable_voice_detection :
        #     request = request + '&enable_voice_detection=' + 'true'

        # print('Request: ' + request)

        # # 读取音频文件
        # with open(audioFile, mode = 'rb') as f:
        #     audioContent = f.read()

        host = 'nls-gateway-cn-shanghai.aliyuncs.com'

        # 设置HTTPS请求头部
        http_headers = {
            'X-NLS-Token': token,
            'Content-type': 'application/octet-stream',
            'Content-Length': len(audio_data)
            }

        # Python 2.x使用httplib
        # conn = httplib.HTTPSConnection(host)

        # Python 3.x使用http.client
        conn = http.client.HTTPSConnection(host)

        conn.request(method='POST', url=request, body=audio_data, headers=http_headers)

        response = conn.getresponse()
        # print('Response status and response reason:')
        # print(response.status ,response.reason)

        body = response.read()
        try:
            print('Recognize response is:')
            body = json.loads(body)
            print(body)

            status = body['status']
            if status == 20000000 :
                result = body['result']
                print('Recognize result: ' + result)
                return result
            else :
                print('Recognizer failed!')
                return '识别失败'

        except ValueError:
            print('The response is not json format string')

        conn.close()
        return '识别失败2'


if __name__ == '__main__':
    audio_ser = AliyunAudioService()
    audio_ser.synthesize_speech('你好，你是谁啊')