前端音频降噪-RNNoise

53 阅读2分钟

在前端使用webrtc的场景中,偶尔就会遇到反馈噪声大,没有原生的效果好,背景杂音都被麦克风收音了等情况。而前端的API中只有几个开启降噪、开启回音消除等几个参数可以设置,默认都是开启的,让人感觉无从下手。

RNNoise

RNNoise 的核心目标是解决传统噪声抑制算法依赖人工调参、难以适应复杂环境的问题。它结合了信号处理(DSP)和深度学习(RNN),通过递归神经网络动态调整噪声抑制参数,适用于语音通信、音频编辑等场景,支持全频段(如48 kHz)实时处理,且无需GPU即可运行在低功耗设备上(如树莓派)

效果

image.png 输入是一段有杂音的声音,通过波形图,对比降噪前和降噪后的效果。

代码

可以直接从gittee获取源代码

<html>
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  <title>音频降噪</title>
</head>
<body>
  <input id="input_audio" value="audio.wav" /><br/>
  <input type="checkbox" id="checkbox_noise" />开启降噪
  <button onclick="startPlay()">开始</button>
  <br/>
  降噪前:<br/>
  <canvas id="audio_wave_before" width="1000" height="200" ></canvas>
  <br/>
  降噪后:<br/>
  <canvas id="audio_wave_after" width="1000" height="200" ></canvas>
  <script src="./rnnoise-runtime.js"></script>
  <script>
    let audioSrouce;
    let audioContext;
    let analyser_before;
    let analyser_after;

    const audio_wave_before = document.getElementById("audio_wave_before");
    const wave_before_context = audio_wave_before.getContext("2d");

    const audio_wave_after = document.getElementById("audio_wave_after");
    const wave_after_context = audio_wave_after.getContext("2d");

    const input_audio = document.getElementById("input_audio");
    let playEnd = false;
    
    function startPlay () {
      if (!audioContext) {
        audioContext = new AudioContext({ sampleRate: 48000 });
        analyser_before = audioContext.createAnalyser();
        analyser_after = audioContext.createAnalyser();
      }

      if (input_audio.value) {
        fetch(input_audio.value).then((res) => { 
          return res.arrayBuffer();
        }).then((arrayBuffer) => {
          audioContext.decodeAudioData(arrayBuffer).then((audioBuffer) => {
            if (audioSrouce) {
              audioSrouce.stop();
            }
            audioSrouce = audioContext.createBufferSource();
            audioSrouce.onended = () => {
              audioSrouce.stop();
              playEnd = true;
            }
            audioSrouce.buffer = audioBuffer;
            audioSrouce.connect(analyser_before);

            if (document.getElementById("checkbox_noise").checked) {
              RNNoiseNode.register(audioContext).then(() => {
                const rnnoise = new RNNoiseNode(audioContext);
                analyser_before.connect(rnnoise);
                rnnoise.connect(analyser_after);
                analyser_after.connect(audioContext.destination);
                audioSrouce.start();
                drawWaveForm(analyser_before, wave_before_context, audio_wave_before.width, 200);
                drawWaveForm(analyser_after, wave_after_context, audio_wave_after.width, 200);
              })
            } else {
              analyser_before.connect(audioContext.destination);
              audioSrouce.start();
              drawWaveForm(analyser_before, wave_before_context, audio_wave_before.width, 200);
            }
          })
        })
      } else {

      }
    }

    function drawWaveForm (analyser, ctx, width, height) {
      const bufferLength = analyser.frequencyBinCount;
      const dataArray = new Uint8Array(bufferLength);

      clearBackground(ctx, width, height);
      ctx.lineWidth = 1;
      ctx.strokeStyle = 'blue';
      let x = 0;

      function draw () {
        if (playEnd) {
          return ;
        }
        requestAnimationFrame(draw);
        analyser.getByteTimeDomainData(dataArray);
        ctx.beginPath();
        let max = 0;
        for (let i = 0; i < bufferLength; ++i) {
          max = Math.max(max, Math.abs(dataArray[i]));
        }
        max = max / 128.0 - 1;
        ctx.moveTo(x, 100 - max * 100);
        ctx.lineTo(x, 100 + max * 100);
        ctx.stroke();
        x += 1;

        if (x > width) {
          clearBackground(ctx, width, height);
          x = 0;
        }
      }
      draw();
    }

    function clearBackground (ctx, width, height) {
      ctx.fillStyle = 'rgb(255, 255, 255)';
      ctx.fillRect(0, 0, width, height);
    }
  </script>
</body>

总结

上面只是简单的使用了RNNoise,设置了音频采样率是48000。可以多了解一下RNNoise的用法,相信降噪效果还能有进一步提升

其他

如果你也是专注前端多媒体或者对前端多媒体感兴趣,可以关注

qrcode.jpg