使用temperMonkey结合 沉浸式 和GPT-SoVITS-Inference

89 阅读1分钟

使用temperMonkey结合 沉浸式 和GPT-SoVITS-Inference

当我们还在使用neospeech的时候,别人已经用起了pytorch
我需要实现一个这样的功能

1.首先我们要获取每次选中的元素,我们需要结合沉浸式翻译,如果只想看原文的自己对元素进行配置即可,沉浸式有一套很成熟的选择器自定义配置,所以,在酸雨看文章的时候,非常的合适

let handleParmCopy = (element) => {
  const clonedAElement = element.cloneNode(true)
  clonedAElement.querySelectorAll('[data-immersive-translate-translation-element-mark="1"]').forEach((element) => {
    element.textContent = ''
  })

  const range = document.createRange()
  range.selectNodeContents(element)
  const selection = window.getSelection()
  selection.removeAllRanges()
  selection.addRange(range)
  // eslint-disable-next-line eqeqeq
  const replaceTxt = clonedAElement.textContent.replace(/[\u4e00-\u9fa5\n]/g, '')
  navigator.clipboard.writeText(replaceTxt).then(function () {
    playAudioStream(replaceTxt)
  }).catch((err) => {
    console.log(err, 'err')
  })
}



setTimeout(() => {
  const dataImmersiveTranslateWalkeds = document.querySelectorAll('[data-immersive-translate-paragraph="1"]');
  let currentIndex = 0;
  document.addEventListener('keydown', (event) => {
    // 检查按下的是向上箭头键
    if (event.key === 'ArrowUp') {
      if (currentIndex > 0) {
        currentIndex--
        let element = dataImmersiveTranslateWalkeds[currentIndex]
        element.scrollIntoView({ behavior: 'smooth', block: 'center' })
        handleParmCopy(element)
      }
    } else if (event.key === 'ArrowDown') {
      if (currentIndex < dataImmersiveTranslateWalkeds.length) {
        currentIndex++
        let element = dataImmersiveTranslateWalkeds[currentIndex]
        element.scrollIntoView({ behavior: 'smooth', block: 'center' })
        handleParmCopy(element)
      }
    }
  })
}, 2000)

2.自动让游览器发音,因为他返回的是流式的,热切我们需要考虑用户上下左右乱按的情况,节流没必要,就做个防止内存溢出的使用AbortController和 close 来关闭上一次的会话和音频

let audioContext = new (window.AudioContext || window.webkitAudioContext)();
// 创建一个AbortController实例来管理取消操作  
let controller = new AbortController();


async function playAudioStream(text) {
  // 如果之前有一个请求正在进行,则取消它  
  audioContext?.close()
  controller.abort();
  // 创建一个新的AbortController实例  
  controller = new AbortController();
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
  let signal = controller.signal;
  text.split()
  transfer = true;
  const myHeaders = new Headers();
  myHeaders.append("Content-Type", "application/json");
  const raw = {
    "character": "CruiseMissile",
    "text": '( ' + text + ' )',
    "stream": true,
    top_k: 5,
    top_p: 0.8,
    temperature: 0.8,
    batch_size: 100,
    text_split_method: 'cut4',
    text_language: 'en',
    prompt_language: 'en',
    save_temp: true
  };
  const requestOptions = {
    method: "POST",
    headers: myHeaders,
    body: JSON.stringify(raw),
    redirect: "follow",
    signal
  };




  fetch('http://127.0.0.1:5000/tts', requestOptions).then((res) => {
    // 确保响应类型是arraybuffer  
    if (!res.ok) {
      throw new Error('Network response was not ok');
    }
    return res.arrayBuffer();
  }).then(arrayBuffer => {

    // 解码音频数据  
    audioContext.decodeAudioData(arrayBuffer, buffer => {
      if (!buffer) {
        console.error('Error decoding audio data');
        return;
      }
      // 创建一个新的AudioBufferSourceNode  
      const source = audioContext.createBufferSource();
      // 设置音频数据  
      source.buffer = buffer;
      // 连接到AudioContext的destination来播放音频  
      source.connect(audioContext.destination);
      // 播放音频  
      source.start();
    });
  })
    .catch((err) => {
      console.error(`下载错误:${err.message}`);
    })
    .finally(function () {
      // always executed
    });
}

3.完整代码如下,如果需要搭建的GPT-SoVITS-Inference,请自定查看
www.yuque.com/xter/zibxlp
www.yuque.com/baicaigongc…

let audioContext = new (window.AudioContext || window.webkitAudioContext)();
// 创建一个AbortController实例来管理取消操作  
let controller = new AbortController();


async function playAudioStream(text) {
  // 如果之前有一个请求正在进行,则取消它  
  audioContext?.close()
  controller.abort();
  // 创建一个新的AbortController实例  
  controller = new AbortController();
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
  let signal = controller.signal;
  text.split()
  transfer = true;
  const myHeaders = new Headers();
  myHeaders.append("Content-Type", "application/json");
  const raw = {
    "character": "CruiseMissile",
    "text": '( ' + text + ' )',
    "stream": true,
    top_k: 5,
    top_p: 0.8,
    temperature: 0.8,
    batch_size: 100,
    text_split_method: 'cut4',
    text_language: 'en',
    prompt_language: 'en',
    save_temp: true
  };
  const requestOptions = {
    method: "POST",
    headers: myHeaders,
    body: JSON.stringify(raw),
    redirect: "follow",
    signal
  };




  fetch('http://127.0.0.1:5000/tts', requestOptions).then((res) => {
    // 确保响应类型是arraybuffer  
    if (!res.ok) {
      throw new Error('Network response was not ok');
    }
    return res.arrayBuffer();
  }).then(arrayBuffer => {

    // 解码音频数据  
    audioContext.decodeAudioData(arrayBuffer, buffer => {
      if (!buffer) {
        console.error('Error decoding audio data');
        return;
      }
      // 创建一个新的AudioBufferSourceNode  
      const source = audioContext.createBufferSource();
      // 设置音频数据  
      source.buffer = buffer;
      // 连接到AudioContext的destination来播放音频  
      source.connect(audioContext.destination);
      // 播放音频  
      source.start();
    });
  })
    .catch((err) => {
      console.error(`下载错误:${err.message}`);
    })
    .finally(function () {
      // always executed
    });
}




let handleParmCopy = (element) => {
  const clonedAElement = element.cloneNode(true)
  clonedAElement.querySelectorAll('[data-immersive-translate-translation-element-mark="1"]').forEach((element) => {
    element.textContent = ''
  })

  const range = document.createRange()
  range.selectNodeContents(element)
  const selection = window.getSelection()
  selection.removeAllRanges()
  selection.addRange(range)
  // eslint-disable-next-line eqeqeq
  const replaceTxt = clonedAElement.textContent.replace(/[\u4e00-\u9fa5\n]/g, '')
  navigator.clipboard.writeText(replaceTxt).then(function () {
    playAudioStream(replaceTxt)
  }).catch((err) => {
    console.log(err, 'err')
  })
}



setTimeout(() => {
  const dataImmersiveTranslateWalkeds = document.querySelectorAll('[data-immersive-translate-paragraph="1"]');
  let currentIndex = 0;
  document.addEventListener('keydown', (event) => {
    // 检查按下的是向上箭头键
    if (event.key === 'ArrowUp') {
      if (currentIndex > 0) {
        currentIndex--
        let element = dataImmersiveTranslateWalkeds[currentIndex]
        element.scrollIntoView({ behavior: 'smooth', block: 'center' })
        handleParmCopy(element)
      }
    } else if (event.key === 'ArrowDown') {
      if (currentIndex < dataImmersiveTranslateWalkeds.length) {
        currentIndex++
        let element = dataImmersiveTranslateWalkeds[currentIndex]
        element.scrollIntoView({ behavior: 'smooth', block: 'center' })
        handleParmCopy(element)
      }
    }
  })
}, 2000)