人工智能交互Day 4

1、 API

1. 什么是API？

应用编程接口(AP)是一组用于构建和集成应用软件的定义和协议。

2. API的原理和机制

通过AP，您无需了解实施原理，也能将您的产品或服务与其他的互通。这样可以简化应用的开发，节省时间和成本。在您开发新的工具和产品，或管理现有工具和产品时，强大灵活的AP可以帮助您简化设计、管理和使用，并带来更参创新机遇

3. API运用的示例

Open AI 百度AIP

2. HTTP相应

1. 什么事HTTP相应？

当服务器收到浏览器的请求后，会发送响应消息给浏览器。一个完整的响应消息主要包括响应首行、响应头信息、空行和响应正文。

2. 常见的HTTP相应

信息响应(100-199)

成功相应（200-299）

重定向消息（300-399）

客户端错误响应(400-499)

服务端错误响应(500-599）

3. 设计智能语音交互机器人

步骤：

1.设计拾音程序

eg：

import  wave

import pyaudio

def record_audio(filename):
    mic = pyaudio.PyAudio()
    stream = mic.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=44100,
        input=True,
        frames_per_buffer=8192)

    print('recording...')
    frame = []
    for _ in range(0, int(44100 / 8192 * 5)):
        data = stream.read(8192)
        frame.append(data)
    stream.stop_stream()
    stream.close()
    mic.terminate()

    wf = wave.open(filename, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(mic.get_sample_size(pyaudio.paInt16))
    wf.setframerate(44100)
    wf.writeframes(b''.join(frame))
    wf.close()

2.STT程序

import wave
import json
from vosk import Model, KaldiRecognizer

def recognized():

    model_path = "vosk-model-small-cn-0.22"

    model = Model(model_path)

    audio_file = "chatting.wav"

    wf = wave.open(audio_file, "rb")

    recognizer = KaldiRecognizer(model, wf.getframerate())

    result = " "

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if recognizer.AcceptWaveform(data):
            res = recognizer.Result()
            result += json.loads(res)['text'] + " "

    final_res = recognizer.FinalResult()
    result += json.loads(final_res)['text']
    print(result)
    wf.close()
    return result

3.1. 合成语音程序

# generate_initial_voice.py

from aip import AipSpeech
import os
from config import APP_ID, API_KEY, SECRET_KEY  # 从配置文件导入API密钥

# 初始化AipSpeech对象
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

def synthesize_initial_voice(text, output_file='yuyinjieguo_voice.mp3'):
    result = client.synthesis(text, 'zh', 1, {
        'vol': 5,
        'spd': 5,
        'pit': 5,
        'per': 4
    })

    if not isinstance(result, dict):
        with open(output_file, 'wb') as f:
            f.write(result)
        print(f"语音合成成功，文件已保存为 {output_file}")
        os.system(f"mpg321 {output_file}")
    else:
        print("语音合成失败，错误信息：", result)

4.导入Open AI

import wave
import pyaudio

import requests
import json
import logging

# gpt-4o
def send_request_4(kw):
    # 替换为自己的KEY
    api_key = 'sk-KQlutD1KV9bDNT5q1dAc203cB3714dAcBb754616303eF621'
    try:
        api_url = 'https://api.apiyi.com/v1/chat/completions'
        # 设置请求头部，包括 API 密钥
        headers = {
            'Authorization': f'Bearer {api_key}',
            'Content-Type': 'application/json'
        }
        # 准备请求的数据
        payload = {
            'model': "gpt-4o",
            'messages': [{"role": "system", "content": kw}]
        }
        # 发送 POST 请求
        response = requests.post(api_url, headers=headers, data=json.dumps(payload))
        # 检查响应状态
        if response.status_code == 200:
            # 解析响应并提取需要的信息
            data = response.json()
            res = data['choices'][0]['message']['content']
            print(res)
            return res
        else:
            return f'Error: Received status code {response.status_code}'
    except Exception as e:
        logging.info(e)
        return 'An error occurred while sending the request'

5.模拟机器人表情程序

import tkinter as tk
from tkinter import Label
import cv2
from PIL import Image, ImageTk

def play_video():
    cap = cv2.VideoCapture('roboteyes.mp4')

    def update_frame():
        ret, frame = cap.read()
        if not ret:
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # 循环播放，从头开始
            ret, frame = cap.read()

        # 调整帧的大小以适应窗口
        frame = cv2.resize(frame, (root.winfo_width(), root.winfo_height()))

        # 转换为 Pillow 图像
        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

        # 将图像转换为 ImageTk 格式
        imgtk = ImageTk.PhotoImage(image=image)

        # 在标签上显示图像
        lbl_video.imgtk = imgtk
        lbl_video.config(image=imgtk)

        # 每10毫秒更新一次帧
        lbl_video.after(10, update_frame)

    update_frame()

# 创建 Tkinter 窗口
root = tk.Tk()

# 设置窗口全屏
root.attributes("-fullscreen", True)

# 获取屏幕尺寸
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()

# 设置窗口大小为屏幕尺寸
root.geometry(f"{screen_width}x{screen_height}")

# 刷新以确保布局
root.update_idletasks()

# 标签用于显示视频
lbl_video = Label(root)
lbl_video.pack(fill=tk.BOTH, expand=True)

# 开始播放视频
play_video()

# 绑定退出全屏和关闭窗口的键盘事件
root.bind("<Escape>", lambda e: root.destroy())

# 启动 Tkinter 主循环
root.mainloop()

6.启动程序

import os
import threading
import tkinter as tk
import cv2
from PIL import Image, ImageTk
from demo1 import record_audio
from demo2 import recognized
from openai_instruction import send_request_4
from voice_generator import synthesize_initial_voice

# 音频文件常量
INITIAL_VOICE = 'init.mp3'
EXIT_VOICE = 'exit.mp3'


# 全屏视频播放函数
def play_video():
    cap = cv2.VideoCapture('roboteyes.mp4')

    def update_frame():
        ret, frame = cap.read()
        if not ret:
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # 循环播放，从头开始
            ret, frame = cap.read()

        # 调整帧的大小以适应窗口
        frame = cv2.resize(frame, (root.winfo_width(), root.winfo_height()))

        # 转换为 Pillow 图像
        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        imgtk = ImageTk.PhotoImage(image=image)

        # 在标签上显示图像
        lbl_video.imgtk = imgtk
        lbl_video.config(image=imgtk)

        # 每10毫秒更新一次帧
        lbl_video.after(10, update_frame)

    root = tk.Tk()
    root.attributes("-fullscreen", True)
    lbl_video = tk.Label(root)
    lbl_video.pack(fill=tk.BOTH, expand=True)
    root.update_idletasks()

    # 绑定退出全屏和关闭窗口的键盘事件
    def on_closing():
        os._exit(0)  # 退出程序

    root.protocol("WM_DELETE_WINDOW", on_closing)

    update_frame()
    root.mainloop()


# 机器人程序逻辑
def robot_logic():
    print('我是智能交互机器人小智，有什么可以帮到您的？')
    os.system(f"mpg321 {INITIAL_VOICE}")

    while True:
        record_audio('chatting.wav')
        result = recognized()

        if '小智' in result or '猪头' in result:
            response = send_request_4(result)
            synthesize_initial_voice(response)
        elif '退出' in result or '拜拜' in result:
            os.system(f"mpg321 {EXIT_VOICE}")
            break

    os._exit(0)  # 确保视频窗口也关闭


# 启动视频播放线程
video_thread = threading.Thread(target=play_video)
video_thread.start()

# 启动机器人程序
robot_logic()

# 等待视频线程结束
video_thread.join()

人工智能交互Day4