树莓派实现检测声音输入完毕

394 阅读2分钟

树莓派检测声音输入完毕

再做智能机器人的时候,遇到一个问题,当唤醒设备后,进入录音模式,如何实现声音停止暂停录音。这里使用 python 实现。

开发环境

  • 树莓派 4b
  • usb 免驱麦克风
import pyaudio
import wave
import re
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from tts import get_text_from_audio,text_to_audio

#查找字符串是否包含USB PnP Sound Device
def contains_usb_pnp_sound_device(input_string):
    pattern = re.compile(r'\bUSB PnP Sound Device\b', re.IGNORECASE)
    match = re.search(pattern, input_string)
    return bool(match)
#查询USB PnP Sound Device 找到设备
def search_usb_sound_device():
    p = pyaudio.PyAudio()
    dev_index = 0
    for ii in range(p.get_device_count()):
        print(p.get_device_info_by_index(ii).get('name'))
        if contains_usb_pnp_sound_device(p.get_device_info_by_index(ii).get('name')) :
            print(f"找到设备了:{ii}")
            dev_index = ii
    p.terminate()
    return dev_index
    
def energy_based_vad(stream, threshold=0.01, duration_threshold=1.0, rate=44100, chunk_size=1024):
    frames = []
    start_time = None

    while True:
        try:
            # 读取音频数据
            data = stream.read(chunk_size)
            audio_array = np.frombuffer(data, dtype=np.int16)

            # 计算能量
            energy = np.sum(audio_array.astype(np.float32) ** 2) / len(audio_array)
            
            # 使用阈值进行语音活动检测
            is_speech = energy > threshold
            # print(f"能量:{energy:.2f},阔知:{threshold},语音:{is_speech}")
            # 判断语音开始和结束
            if is_speech and start_time is None:
                start_time = len(frames) * chunk_size / rate
                # print(f"检测到语音段:{start_time:.2f}s")
            elif not is_speech and start_time is not None:
                end_time = len(frames) * chunk_size / rate
                if end_time - start_time >= duration_threshold:
                    start_time = None
                    break
            frames.append(data)

        except KeyboardInterrupt:
            break

    return frames
#录音方法
def record_audio(wave_out_path):
    dev_index = search_usb_sound_device()
    form_1 = pyaudio.paInt16 # 16-bit resolution
    chans = 1 # 1 channel
    samp_rate = 44100 # 44.1kHz sampling rate
    chunk = 4096 # 2^12 samples for buffer
    record_secs = 3 # seconds to record
    
    audio = pyaudio.PyAudio() # create pyaudio instantiation
    # 设置阈值和持续时间阈值
    threshold_value = 30000
    duration_threshold_value = 1.3
    # create pyaudio stream
    stream = audio.open(format = form_1,rate = samp_rate,channels = chans,input_device_index =dev_index,input = True,frames_per_buffer=chunk)
    try:
        print("开始录音,按 Ctrl+C 结束录音")
        audio_frames = energy_based_vad(stream, threshold=threshold_value, duration_threshold=duration_threshold_value)
        print("录音结束")
        # 在这里可以将音频帧保存到文件或进行其他处理
        wf = wave.open(wave_out_path, 'wb')
        wf.setnchannels(1)
        wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wf.setframerate(44100)
        print("* recording")
        wf.writeframes(b''.join(audio_frames))
        print("* done recording")
        # 关闭音频流
        stream.stop_stream()
        stream.close()
        wf.close()
    except Exception as e:
        print("录音被用户中断")
    finally:
        word = get_text_from_audio(wave_out_path)
        word = json.loads(word)
    return word.get("Result")

直接调用record_audio方法,方法需要录音文件名称