语音识别与语音输出

374 阅读2分钟

前言

在写 JS30 20 - Speech Detection 和 23 - Speech Synthesis 时发现是语音识别语音输出的案例,看着挺有意思,放在一起总结一下

正文

语音识别

相关 API

SpeechRecognition

捋清思路

  1. 实例化对象
  2. 手动调用 start() 方法
  3. 再监听 result 事件拿到语音转换的文字内容
  4. 渲染
  5. 监听 end 事件,继续调用 start() 方法

冻手!

  • 先打印一下 reselt 拿到的事件是什么
window.SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
const recognition = new SpeechRecognition();
recognition.lang = 'zh-CN';

recognition.addEventListener('result', e => {
    console.log(e)
});

recognition.start();

image.png

  • 里面的 results 是我们需要的转化后的结果,其中
    • confidence 为服务器认可的一个阈值
    • transcript 即为识别结果
    • isFinal 指结果是否为未最终的结果

源码

window.SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;

  const recognition = new SpeechRecognition();
  recognition.interimResults = true;
  recognition.lang = 'en-US';

  let p = document.createElement('p');
  const words = document.querySelector('.words');
  words.appendChild(p);

  recognition.addEventListener('result', e => {
    const transcript = Array.from(e.results)
      .map(result => result[0])
      .map(result => result.transcript)
      .join('');

      const poopScript = transcript.replace(/poop|poo|shit|dump/gi, '💩');
      p.textContent = poopScript;

      if (e.results[0].isFinal) {
        p = document.createElement('p');
        words.appendChild(p);
      }
  });

  recognition.addEventListener('end', recognition.start);

  recognition.start();

改进

  • 观察示例代码后发现他还对一些违禁词进行了替换
  • 设置 recognition.interimResults = true 的目的是告诉语音识别系统在识别过程中返回临时结果。即使语音识别系统尚未确认最终结果,也可以在中间步骤获取识别到的文本,从而实时显示部分识别结果

只能说还是🐂

语音输出

相关 API

SpeechSynthesisUtterance

捋清思路

因为要求的页面是这样的😬😬

image.png 因此还要看音色、速率以及语调

音色

  • 这个需要手动监听 voiceschanged 事件,才会拿到数据
  • 为什么得手动监听,看官方文档也没看懂。。。感觉它说的挺抽象的。。
  • 查了一下之后,应该是因为获取所有声音是需要联网去异步加载的,因此直接调用 getVoices 是空数组, 详情请见
const synth = window.speechSynthesis;

synth.addEventListener("voiceschanged", function(e) {
    console.log(this.getVoices())
}

image.png

速率 语调

  • 实例化,主要是操作得到的这个对象完成对输出语音一些性质的更改
function A(){
      const utterThis = new SpeechSynthesisUtterance()
      console.log(utterThis)
      console.log(utterThis.pitch,utterThis.rate)
}
A()

image.png

万事俱备,冻手!

首先我们需要在全局注册一个实例化对象,方便我们在每个函数里面调用修改

获取需要的音色

const synth = window.speechSynthesis;
function getVoice() {
    voices = this.getVoices();
    voicesDropdown.innerHTML = voices
      .filter(voice => voice.lang.includes('en'))
      .map(voice => `<option value="${voice.name}">${voice.name} (${voice.lang})</option>`)
      .join(''); 
}
  synth.addEventListener("voiceschanged", getVoice);

设置音色

function setVoice(value){
    msg.voice = voices.find(voice => voice.name === value);
}
voicesDropdown.addEventListener(
'change',
()=>setVoice(voicesDropdown.value)
);

image.png

设置速率 & 音调

function setValue(name,value){
    msg[name] = value
}
options.forEach(option => option.addEventListener(
'change', 
()=>setValue(option.name,option.value))
);

播放与取消

  • 会用到 cancelspeak 两个方法
function toggleStart(startOver = true) {
    speechSynthesis.cancel();
    if (startOver) {
      speechSynthesis.speak(msg);
    }
}
speakButton.addEventListener('click', toggle);
cancelButton.addEventListener('click', () => toggleStart(false));

暂停与继续

  • 用到 pauseresume 两个方法
function toggleContinue(continueOver = true) {
    speechSynthesis.pause();
    if (continueOver) {
      speechSynthesis.resume(msg);
    }
  }
continueButton.addEventListener('click', toggleContinue);
stopButton.addEventListener('click', () => toggleContinue(false));

后面把这两个方法整合了一下

完成

源码

const msg = new SpeechSynthesisUtterance();
let voices = [];
const voicesDropdown = document.querySelector('[name="voice"]');
const options = document.querySelectorAll('[type="range"], [name="text"]');
const speakButton = document.querySelector('#speak');
const cancelButton = document.querySelector('#cancel');
const continueButton = document.querySelector('#continue');
const stopButton = document.querySelector('#stop');
msg.text = document.querySelector('[name="text"]').value;

const synth = window.speechSynthesis;

function getVoice() {
  voices = this.getVoices();
  voicesDropdown.innerHTML = voices
    .filter(voice => voice.lang.includes('en'))
    .map(voice => `<option value="${voice.name}">${voice.name} (${voice.lang})</option>`)
    .join('');
}
function setVoice(value) {
  msg.voice = voices.find(voice => voice.name === value);
}
function setValue(name, value) {
  msg[name] = value
}
function toggle(mode, flag = true) {
  if (mode === 'start') {
    speechSynthesis.cancel();
    if (flag) {
      speechSynthesis.speak(msg);
    }
  } else if (mode === 'continue') {
    speechSynthesis.pause();
    if (flag) {
      speechSynthesis.resume(msg);
    }
  }
}


synth.addEventListener("voiceschanged", getVoice);
voicesDropdown.addEventListener('change', () => setVoice(voicesDropdown.value));
options.forEach(option => option.addEventListener('change', () => setValue(option.name, option.value)));
speakButton.addEventListener('click', () => toggle('start'));
cancelButton.addEventListener('click', () => toggle('start', false));
continueButton.addEventListener('click', () => toggle('continue'));
stopButton.addEventListener('click', () => toggle('continue', false));

结语

不知道还能保持这样的创作激情多久嘿嘿