树莓派检测声音输入完毕
再做智能机器人的时候,遇到一个问题,当唤醒设备后,进入录音模式,如何实现声音停止暂停录音。这里使用 python 实现。
开发环境
- 树莓派 4b
- usb 免驱麦克风
import pyaudio
import wave
import re
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from tts import get_text_from_audio,text_to_audio
#查找字符串是否包含USB PnP Sound Device
def contains_usb_pnp_sound_device(input_string):
pattern = re.compile(r'\bUSB PnP Sound Device\b', re.IGNORECASE)
match = re.search(pattern, input_string)
return bool(match)
#查询USB PnP Sound Device 找到设备
def search_usb_sound_device():
p = pyaudio.PyAudio()
dev_index = 0
for ii in range(p.get_device_count()):
print(p.get_device_info_by_index(ii).get('name'))
if contains_usb_pnp_sound_device(p.get_device_info_by_index(ii).get('name')) :
print(f"找到设备了:{ii}")
dev_index = ii
p.terminate()
return dev_index
def energy_based_vad(stream, threshold=0.01, duration_threshold=1.0, rate=44100, chunk_size=1024):
frames = []
start_time = None
while True:
try:
# 读取音频数据
data = stream.read(chunk_size)
audio_array = np.frombuffer(data, dtype=np.int16)
# 计算能量
energy = np.sum(audio_array.astype(np.float32) ** 2) / len(audio_array)
# 使用阈值进行语音活动检测
is_speech = energy > threshold
# print(f"能量:{energy:.2f},阔知:{threshold},语音:{is_speech}")
# 判断语音开始和结束
if is_speech and start_time is None:
start_time = len(frames) * chunk_size / rate
# print(f"检测到语音段:{start_time:.2f}s")
elif not is_speech and start_time is not None:
end_time = len(frames) * chunk_size / rate
if end_time - start_time >= duration_threshold:
start_time = None
break
frames.append(data)
except KeyboardInterrupt:
break
return frames
#录音方法
def record_audio(wave_out_path):
dev_index = search_usb_sound_device()
form_1 = pyaudio.paInt16 # 16-bit resolution
chans = 1 # 1 channel
samp_rate = 44100 # 44.1kHz sampling rate
chunk = 4096 # 2^12 samples for buffer
record_secs = 3 # seconds to record
audio = pyaudio.PyAudio() # create pyaudio instantiation
# 设置阈值和持续时间阈值
threshold_value = 30000
duration_threshold_value = 1.3
# create pyaudio stream
stream = audio.open(format = form_1,rate = samp_rate,channels = chans,input_device_index =dev_index,input = True,frames_per_buffer=chunk)
try:
print("开始录音,按 Ctrl+C 结束录音")
audio_frames = energy_based_vad(stream, threshold=threshold_value, duration_threshold=duration_threshold_value)
print("录音结束")
# 在这里可以将音频帧保存到文件或进行其他处理
wf = wave.open(wave_out_path, 'wb')
wf.setnchannels(1)
wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
wf.setframerate(44100)
print("* recording")
wf.writeframes(b''.join(audio_frames))
print("* done recording")
# 关闭音频流
stream.stop_stream()
stream.close()
wf.close()
except Exception as e:
print("录音被用户中断")
finally:
word = get_text_from_audio(wave_out_path)
word = json.loads(word)
return word.get("Result")
直接调用record_audio方法,方法需要录音文件名称