使用window小娜实现文本转语音

207 阅读2分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路。

python 下载地址

npm.taobao.org/mirrors/pyt…

下载、安装完成后,更新pip

# 更新pip3
python -m pip install -U pip
# 设置pip镜像源,加快下载安装速度
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple

安装所需依赖

certifi==2021.10.8
cffi==1.15.0
charset-normalizer==2.0.9
click==8.0.3
colorama==0.4.4
comtypes==1.1.10
Flask==2.0.2
Flask-Cors==3.0.10
gevent==21.8.0
greenlet==1.1.2
idna==3.3
importlib-metadata==4.8.2
itsdangerous==2.0.1
Jinja2==3.0.3
MarkupSafe==2.0.1
pycparser==2.21
pypiwin32==223
pyttsx3==2.90
pywin32==302
requests==2.26.0
six==1.16.0
typing_extensions==4.0.1
urllib3==1.26.7
uuid==1.30
Werkzeug==2.0.2
WSGIserver==1.3
zipp==3.6.0
zope.event==4.5.0
zope.interface==5.4.0

python代码

#!/usr/bin/env python
# -*- coding:utf-8 -*-
from pathlib import Path
from flask import Flask, jsonify, request, make_response
from gevent import pywsgi
from flask_cors import CORS
import requests
import sys
import os
import json
import uuid
from flask import logging
import configparser
from flask import jsonify, request, send_file, make_response, send_from_directory
import os
import uuid
import sys
import json
import platform
import requests
import configparser
import pyttsx3

# 创建蓝图
from flask import Blueprint
from flask import Blueprint, render_template, request

app = Flask(__name__)
CORS(app)
# 解决中文乱码的问题,将json数据内的中文正常显示
app.config['JSON_AS_ASCII'] = False
# 开启debug模式
app.config['DEBUG'] = True
# flask log
log = logging.create_logger(app)

# 创建蓝图
nlpServer = Blueprint('nlpServer', __name__)

file_floder = './file/'

if not os.path.exists(file_floder):
    os.makedirs(file_floder)

 
#  window tts 服务 其他系统可调用的window服务地址
winTTSUrl = cf.get("tts", "url")
 


# 统一异常处理
@nlpServer.errorhandler(Exception)
def special_exception_handler(error):
    log.error(error)
    response = dict(status=500, msg="服务器错误")
    return jsonify(response), 500


# 自定义业务异常
class MyError(Exception):
    # payload 是否变更http status
    def __init__(self, msg, status=None, payload=None):
        Exception.__init__(self)
        self.msg = msg
        self.status = 200
        self.payload = False
        if status is not None:
            self.status = status
        if payload is not None:
            self.payload = payload


@nlpServer.errorhandler(MyError)
def MyErrorHandle(error):
    response = dict(status=error.status, msg=error.msg)
    if error.payload is None or error.payload is False:
        return jsonify(response), 200
    else:
        return jsonify(response), error.status


@nlpServer.route('/nlp')  # 装饰器,指定路由
def index():  # 定义一个函数
    return 'hello nlp'


# 文本转语音
@nlpServer.route('/tts', methods=['POST'])
def tts():
    text = request.json.get("text")
    rateValue = request.json.get("rate")
    volumeValue = request.json.get("volume")
    voiceValue = request.json.get("voice")

    # 参数校验
    if text is None or text.isspace():
        log.error("请求参数不能为空!")
        raise MyError("请求参数不能为空", status=400, payload=True)

    # 模块初始化
    if platform.system().lower() == 'windows':
        log.info("windows platform")
        engine = pyttsx3.init()
    elif platform.system().lower() == 'linux':
        log.info("linux platform")
        engine = None

    # 平台校验 linux 请求转发到window服务
    if engine is None:
        log.error("linux platform forward windows url")
        rq_json = {"text": text, "rate": rateValue, "volume": volumeValue, "voice": voiceValue}
        headers = {"Content-Type": "application/json"}
        req = requests.post(winTTSUrl, headers=headers, json=rq_json, timeout=200)
        # 对 文件response 进行二次生成response
        response = make_response(req.content)
        response.headers = dict(req.headers)
        return response
        # raise MyError("未初始化语音包", status=500, payload=True)

    # 设置发音速率,默认值为200
    rate = engine.getProperty('rate')
    engine.setProperty('rate', rate)
    if rateValue is not None and 0.0 <= float(rateValue) <= 2.0:
        log.info("rateValue:  " + str(rateValue))
        engine.setProperty('rate', rate * float(rateValue))

    # 设置发音大小,范围为0.0-1.0
    engine.setProperty('volume', 1)
    if volumeValue is not None and 0.0 <= float(volumeValue) <= 1.0:
        log.info("volumeValue:    " + str(volumeValue))
        engine.setProperty('volume', float(volumeValue))

    # 设置默认的声音: 实际上是调用操作系统的语音包
    voices = engine.getProperty('voices')

    engine.setProperty('voice', voices[0].id)
    if voiceValue is not None and 0 <= int(voiceValue) <= 10:
        engine.setProperty('voice', voices[int(voiceValue)].id)

    # 添加朗读文本
    fileName = ''.join(str(uuid.uuid4()).split('-')) + ".mp3"
    engine.save_to_file(text, file_floder + fileName)
    log.info(text + "   " + fileName)

    # 等待tts完毕
    engine.runAndWait()
    engine.stop()

    response = make_response(send_from_directory(file_floder, fileName))
    response.headers["Content-Disposition"] = "attachment; filename={}".format(fileName.encode().decode('utf-8'))
    return response


# 查系统语音包
@nlpServer.route('/ttsLang', methods=['POST', "GET"])
def ttsLang():
    responseData = {"status": 200, "msg": "成功", "data": []}

    # 模块初始化
    if platform.system().lower() == 'windows':
        log.info("windows platform")
        engine = pyttsx3.init()
    elif platform.system().lower() == 'linux':
        log.info("linux platform")
        engine = None

    # 平台校验
    if engine is None:
        log.error("linux platform")
        raise MyError("未初始化语音包", status=500, payload=True)

    # 设置默认的声音: 实际上是调用操作系统的语音包
    voices = engine.getProperty('voices')
    if len(voices) > 0:
        for i in range(len(voices)):
            log.info(" voices:  " + str(voices[i]))

            responseData["data"].append({"index": i, "id": voices[i].id, "name": voices[i].name})

    return jsonify(responseData), 200


app.register_blueprint(nlpServer)


@app.route('/')
def hello_world():
    return 'Hello World!'


if __name__ == '__main__':
    log.info("WSGI http server start ")
    # app.run(debug=True, host='0.0.0.0', port=5000)
    server = pywsgi.WSGIServer(('0.0.0.0', 5000), app)
    server.serve_forever()

启动

python ttsServer.py

测试

# 访问该地址 可查看window的语音包 若无货缺失 可使用高版本window10 或安装语音包
# 安装自行百度
http://localhost:5000/ttsLang
# 若存在,结果可能如下
{
  "data": [
    {
      "id": "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\TTS_MS_ZH-CN_HUIHUI_11.0", 
      "index": 0, 
      "name": "Microsoft Huihui Desktop - Chinese (Simplified)"
    }, 
    {
      "id": "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\TTS_MS_EN-US_ZIRA_11.0", 
      "index": 1, 
      "name": "Microsoft Zira Desktop - English (United States)"
    }
  ], 
  "msg": "成功", 
  "status": 200
}

然后,可去调用 接口 :http://localhost:5000/tts 测试啦 ,请求参数自行观看代码