人工智能交互Day 4
1、 API
1. 什么是API?
应用编程接口(AP)是一组用于构建和集成应用软件的定义和协议。
2. API的原理和机制
通过AP,您无需了解实施原理,也能将您的产品或服务与其他的互通。这样可以简化应用的开发,节省时间和成本。在您开发新的工具和产品,或管理现有工具和产品时,强大灵活的AP可以帮助您简化设计、管理和使用,并带来更参创新机遇
3. API运用的示例
Open AI 百度AIP
2. HTTP相应
1. 什么事HTTP相应?
当服务器收到浏览器的请求后,会发送响应消息给浏览器。 一个完整的响应消息主要包括响应首行、响应头信息、空行和响应正文。
2. 常见的HTTP相应
信息响应(100-199)
成功相应(200-299)
重定向消息(300-399)
客户端错误响应(400-499)
服务端错误响应(500-599)
3. 设计智能语音交互机器人
步骤:
1.设计拾音程序
eg:
import wave
import pyaudio
def record_audio(filename):
mic = pyaudio.PyAudio()
stream = mic.open(
format=pyaudio.paInt16,
channels=1,
rate=44100,
input=True,
frames_per_buffer=8192)
print('recording...')
frame = []
for _ in range(0, int(44100 / 8192 * 5)):
data = stream.read(8192)
frame.append(data)
stream.stop_stream()
stream.close()
mic.terminate()
wf = wave.open(filename, 'wb')
wf.setnchannels(1)
wf.setsampwidth(mic.get_sample_size(pyaudio.paInt16))
wf.setframerate(44100)
wf.writeframes(b''.join(frame))
wf.close()
2.STT程序
import wave
import json
from vosk import Model, KaldiRecognizer
def recognized():
model_path = "vosk-model-small-cn-0.22"
model = Model(model_path)
audio_file = "chatting.wav"
wf = wave.open(audio_file, "rb")
recognizer = KaldiRecognizer(model, wf.getframerate())
result = " "
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if recognizer.AcceptWaveform(data):
res = recognizer.Result()
result += json.loads(res)['text'] + " "
final_res = recognizer.FinalResult()
result += json.loads(final_res)['text']
print(result)
wf.close()
return result
3.1. 合成语音程序
# generate_initial_voice.py
from aip import AipSpeech
import os
from config import APP_ID, API_KEY, SECRET_KEY # 从配置文件导入API密钥
# 初始化AipSpeech对象
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
def synthesize_initial_voice(text, output_file='yuyinjieguo_voice.mp3'):
result = client.synthesis(text, 'zh', 1, {
'vol': 5,
'spd': 5,
'pit': 5,
'per': 4
})
if not isinstance(result, dict):
with open(output_file, 'wb') as f:
f.write(result)
print(f"语音合成成功,文件已保存为 {output_file}")
os.system(f"mpg321 {output_file}")
else:
print("语音合成失败,错误信息:", result)
4.导入Open AI
import wave
import pyaudio
import requests
import json
import logging
# gpt-4o
def send_request_4(kw):
# 替换为自己的KEY
api_key = 'sk-KQlutD1KV9bDNT5q1dAc203cB3714dAcBb754616303eF621'
try:
api_url = 'https://api.apiyi.com/v1/chat/completions'
# 设置请求头部,包括 API 密钥
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
# 准备请求的数据
payload = {
'model': "gpt-4o",
'messages': [{"role": "system", "content": kw}]
}
# 发送 POST 请求
response = requests.post(api_url, headers=headers, data=json.dumps(payload))
# 检查响应状态
if response.status_code == 200:
# 解析响应并提取需要的信息
data = response.json()
res = data['choices'][0]['message']['content']
print(res)
return res
else:
return f'Error: Received status code {response.status_code}'
except Exception as e:
logging.info(e)
return 'An error occurred while sending the request'
5.模拟机器人表情程序
import tkinter as tk
from tkinter import Label
import cv2
from PIL import Image, ImageTk
def play_video():
cap = cv2.VideoCapture('roboteyes.mp4')
def update_frame():
ret, frame = cap.read()
if not ret:
cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # 循环播放,从头开始
ret, frame = cap.read()
# 调整帧的大小以适应窗口
frame = cv2.resize(frame, (root.winfo_width(), root.winfo_height()))
# 转换为 Pillow 图像
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# 将图像转换为 ImageTk 格式
imgtk = ImageTk.PhotoImage(image=image)
# 在标签上显示图像
lbl_video.imgtk = imgtk
lbl_video.config(image=imgtk)
# 每10毫秒更新一次帧
lbl_video.after(10, update_frame)
update_frame()
# 创建 Tkinter 窗口
root = tk.Tk()
# 设置窗口全屏
root.attributes("-fullscreen", True)
# 获取屏幕尺寸
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
# 设置窗口大小为屏幕尺寸
root.geometry(f"{screen_width}x{screen_height}")
# 刷新以确保布局
root.update_idletasks()
# 标签用于显示视频
lbl_video = Label(root)
lbl_video.pack(fill=tk.BOTH, expand=True)
# 开始播放视频
play_video()
# 绑定退出全屏和关闭窗口的键盘事件
root.bind("<Escape>", lambda e: root.destroy())
# 启动 Tkinter 主循环
root.mainloop()
6.启动程序
import os
import threading
import tkinter as tk
import cv2
from PIL import Image, ImageTk
from demo1 import record_audio
from demo2 import recognized
from openai_instruction import send_request_4
from voice_generator import synthesize_initial_voice
# 音频文件常量
INITIAL_VOICE = 'init.mp3'
EXIT_VOICE = 'exit.mp3'
# 全屏视频播放函数
def play_video():
cap = cv2.VideoCapture('roboteyes.mp4')
def update_frame():
ret, frame = cap.read()
if not ret:
cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # 循环播放,从头开始
ret, frame = cap.read()
# 调整帧的大小以适应窗口
frame = cv2.resize(frame, (root.winfo_width(), root.winfo_height()))
# 转换为 Pillow 图像
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
imgtk = ImageTk.PhotoImage(image=image)
# 在标签上显示图像
lbl_video.imgtk = imgtk
lbl_video.config(image=imgtk)
# 每10毫秒更新一次帧
lbl_video.after(10, update_frame)
root = tk.Tk()
root.attributes("-fullscreen", True)
lbl_video = tk.Label(root)
lbl_video.pack(fill=tk.BOTH, expand=True)
root.update_idletasks()
# 绑定退出全屏和关闭窗口的键盘事件
def on_closing():
os._exit(0) # 退出程序
root.protocol("WM_DELETE_WINDOW", on_closing)
update_frame()
root.mainloop()
# 机器人程序逻辑
def robot_logic():
print('我是智能交互机器人小智,有什么可以帮到您的?')
os.system(f"mpg321 {INITIAL_VOICE}")
while True:
record_audio('chatting.wav')
result = recognized()
if '小智' in result or '猪头' in result:
response = send_request_4(result)
synthesize_initial_voice(response)
elif '退出' in result or '拜拜' in result:
os.system(f"mpg321 {EXIT_VOICE}")
break
os._exit(0) # 确保视频窗口也关闭
# 启动视频播放线程
video_thread = threading.Thread(target=play_video)
video_thread.start()
# 启动机器人程序
robot_logic()
# 等待视频线程结束
video_thread.join()