树莓派实现检测声音输入完毕树莓派检测声音输入完毕再做智能机器人的时候，遇到一个问题，当唤醒设备后，进入录音模式，如何实

树莓派检测声音输入完毕

再做智能机器人的时候，遇到一个问题，当唤醒设备后，进入录音模式，如何实现声音停止暂停录音。这里使用 python 实现。

开发环境

树莓派 4b
usb 免驱麦克风

import pyaudio
import wave
import re
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from tts import get_text_from_audio,text_to_audio

#查找字符串是否包含USB PnP Sound Device
def contains_usb_pnp_sound_device(input_string):
    pattern = re.compile(r'\bUSB PnP Sound Device\b', re.IGNORECASE)
    match = re.search(pattern, input_string)
    return bool(match)
#查询USB PnP Sound Device 找到设备
def search_usb_sound_device():
    p = pyaudio.PyAudio()
    dev_index = 0
    for ii in range(p.get_device_count()):
        print(p.get_device_info_by_index(ii).get('name'))
        if contains_usb_pnp_sound_device(p.get_device_info_by_index(ii).get('name')) :
            print(f"找到设备了:{ii}")
            dev_index = ii
    p.terminate()
    return dev_index
    
def energy_based_vad(stream, threshold=0.01, duration_threshold=1.0, rate=44100, chunk_size=1024):
    frames = []
    start_time = None

    while True:
        try:
            # 读取音频数据
            data = stream.read(chunk_size)
            audio_array = np.frombuffer(data, dtype=np.int16)

            # 计算能量
            energy = np.sum(audio_array.astype(np.float32) ** 2) / len(audio_array)
            
            # 使用阈值进行语音活动检测
            is_speech = energy > threshold
            # print(f"能量：{energy:.2f},阔知:{threshold}，语音：{is_speech}")
            # 判断语音开始和结束
            if is_speech and start_time is None:
                start_time = len(frames) * chunk_size / rate
                # print(f"检测到语音段：{start_time:.2f}s")
            elif not is_speech and start_time is not None:
                end_time = len(frames) * chunk_size / rate
                if end_time - start_time >= duration_threshold:
                    start_time = None
                    break
            frames.append(data)

        except KeyboardInterrupt:
            break

    return frames
#录音方法
def record_audio(wave_out_path):
    dev_index = search_usb_sound_device()
    form_1 = pyaudio.paInt16 # 16-bit resolution
    chans = 1 # 1 channel
    samp_rate = 44100 # 44.1kHz sampling rate
    chunk = 4096 # 2^12 samples for buffer
    record_secs = 3 # seconds to record
    
    audio = pyaudio.PyAudio() # create pyaudio instantiation
    # 设置阈值和持续时间阈值
    threshold_value = 30000
    duration_threshold_value = 1.3
    # create pyaudio stream
    stream = audio.open(format = form_1,rate = samp_rate,channels = chans,input_device_index =dev_index,input = True,frames_per_buffer=chunk)
    try:
        print("开始录音，按 Ctrl+C 结束录音")
        audio_frames = energy_based_vad(stream, threshold=threshold_value, duration_threshold=duration_threshold_value)
        print("录音结束")
        # 在这里可以将音频帧保存到文件或进行其他处理
        wf = wave.open(wave_out_path, 'wb')
        wf.setnchannels(1)
        wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wf.setframerate(44100)
        print("* recording")
        wf.writeframes(b''.join(audio_frames))
        print("* done recording")
        # 关闭音频流
        stream.stop_stream()
        stream.close()
        wf.close()
    except Exception as e:
        print("录音被用户中断")
    finally:
        word = get_text_from_audio(wave_out_path)
        word = json.loads(word)
    return word.get("Result")

直接调用record_audio方法，方法需要录音文件名称