LangChain项目实战
人脉工具
项目背景:易速鲜花电商网络自从创建以来,通过微信、抖音、小红书等自媒体宣传推广,短期内获得了广泛流量展示。目前,营销部门希望以此为契机,再接再厉,继续扩大品牌影响力。经过调研,发现很多用户会通过微博热搜推荐的新闻来购买鲜花赠送给明星、达人等,因此各部门一致认为应该联络相关微博大V,共同推广,带动品牌成长。
项目目标: 帮助市场营销部门的员工找到微博上适合做鲜花推广的大V,并给出具体的联络方案。
项目的技术实现细节
第一步: 通过LangChain的搜索工具,以模糊搜索的方式,帮助运营人员找到微博中有可能对相关鲜花推广感兴趣的大V(比如喜欢玫瑰花的大V),并返回UID。
第二步: 根据微博UID,通过爬虫工具拿到相关大V的微博公开信息,并以JSON格式返回大V的数据。
第三步: 通过LangChain调用LLM,通过LLM的总结整理以及生成功能,根据大V的个人信息,写一篇热情洋溢的介绍型文章,谋求与该大V的合作。
第四步: 把LangChain输出解析功能加入进来,让LLM生成可以嵌入提示模板的格式化数据结构。
第五步: 添加HTML、CSS,并用Flask创建一个App,在网络上部署及发布这个鲜花电商人脉工具,供市场营销部门的人员使用。
项目实现
爬取大V
使用findbigV.py作为主程序入口。
# 设置OpenAI API密钥
import os
os.environ["SERPAPI_API_KEY"] = (
"Your SERPAPI API KEY"
)
# 导入所取的库
import re
from agents.weibo_agent import lookup_V
from tools.general_tool import remove_non_chinese_fields
from tools.scraping_tool import get_data
if __name__ == "__main__":
# 拿到UID
response_UID = lookup_V(flower_type="牡丹")
# 抽取UID里面的数字
UID = re.findall(r"\d+", response_UID)[0]
print("这位鲜花大V的微博ID是", UID)
# 根据UID爬取大V信息
person_info = get_data(UID)
print(person_info)
# 移除无用的信息
remove_non_chinese_fields(person_info)
print(person_info)
使用weibo_agent.py来实现代理搜寻
# 导入一个搜索UID的工具
from tools.search_tool import get_UID
# 导入所需的库
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
# 通过LangChain代理找到UID的函数
def lookup_V(flower_type: str) :
# 初始化大模型
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
# 寻找UID的模板
template = """given the {flower} I want you to get a related 微博 UID.
Your answer should contain only a UID.
The URL always starts with https://weibo.com/u/
for example, if https://weibo.com/u/1669879400 is her 微博, then 1669879400 is her UID
This is only the example don't give me this, but the actual UID"""
# 完整的提示模板
prompt_template = PromptTemplate(
input_variables=["flower"], template=template
)
# 代理的工具
tools = [
Tool(
name="Crawl Google for 微博 page",
func=get_UID,
description="useful for when you need get the 微博 UID",
)
]
# 初始化代理
agent = initialize_agent(
tools,
llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True
)
# 返回找到的UID
ID = agent.run(prompt_template.format_prompt(flower=flower_type))
return ID
导入各个工具
generate_tool.py
import re
def contains_chinese(s):
return bool(re.search("[\u4e00-\u9fa5]", s))
def remove_non_chinese_fields(d):
if isinstance(d, dict):
to_remove = [
key
for key, value in d.items()
if isinstance(value, (str, int, float, bool))
and (not contains_chinese(str(value)))
]
for key in to_remove:
del d[key]
for key, value in d.items():
if isinstance(value, (dict, list)):
remove_non_chinese_fields(value)
elif isinstance(d, list):
to_remove_indices = []
for i, item in enumerate(d):
if isinstance(item, (str, int, float, bool)) and (
not contains_chinese(str(item))
):
to_remove_indices.append(i)
else:
remove_non_chinese_fields(item)
for index in reversed(to_remove_indices):
d.pop(index)
scraping_tool
# 导入所需的库
import json
import requests
import time
# 定义爬取微博用户信息的函数
def scrape_weibo(url: str):
"""爬取相关鲜花服务商的资料"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
"Referer": "https://weibo.com",
}
cookies = {"cookie": """your cookie"""}
response = requests.get(url, headers=headers, cookies=cookies)
time.sleep(3) # 加上3s 的延时防止被反爬
return response.text
# 根据UID构建URL爬取信息
def get_data(id):
url = "https://weibo.com/ajax/profile/detail?uid={}".format(id)
html = scrape_weibo(url)
response = json.loads(html)
return response
search_tool
# 导入SerpAPIWrapper
from langchain.utilities import SerpAPIWrapper
# 重新定制SerpAPIWrapper,重构_process_response,返回URL
class CustomSerpAPIWrapper(SerpAPIWrapper):
def __init__(self):
super(CustomSerpAPIWrapper, self).__init__()
@staticmethod
def _process_response(res: dict) -> str:
"""Process response from SerpAPI."""
if "error" in res.keys():
raise ValueError(f"Got error from SerpAPI: {res['error']}")
if "answer_box_list" in res.keys():
res["answer_box"] = res["answer_box_list"]
if "answer_box" in res.keys():
answer_box = res["answer_box"]
if isinstance(answer_box, list):
answer_box = answer_box[0]
if "result" in answer_box.keys():
return answer_box["result"]
elif "answer" in answer_box.keys():
return answer_box["answer"]
elif "snippet" in answer_box.keys():
return answer_box["snippet"]
elif "snippet_highlighted_words" in answer_box.keys():
return answer_box["snippet_highlighted_words"]
else:
answer = {}
for key, value in answer_box.items():
if not isinstance(value, (list, dict)) and not (
isinstance(value, str) and value.startswith("http")
):
answer[key] = value
return str(answer)
elif "events_results" in res.keys():
return res["events_results"][:10]
elif "sports_results" in res.keys():
return res["sports_results"]
elif "top_stories" in res.keys():
return res["top_stories"]
elif "news_results" in res.keys():
return res["news_results"]
elif "jobs_results" in res.keys() and "jobs" in res["jobs_results"].keys():
return res["jobs_results"]["jobs"]
elif (
"shopping_results" in res.keys()
and "title" in res["shopping_results"][0].keys()
):
return res["shopping_results"][:3]
elif "questions_and_answers" in res.keys():
return res["questions_and_answers"]
elif (
"popular_destinations" in res.keys()
and "destinations" in res["popular_destinations"].keys()
):
return res["popular_destinations"]["destinations"]
elif "top_sights" in res.keys() and "sights" in res["top_sights"].keys():
return res["top_sights"]["sights"]
elif (
"images_results" in res.keys()
and "thumbnail" in res["images_results"][0].keys()
):
return str([item["thumbnail"] for item in res["images_results"][:10]])
snippets = []
if "knowledge_graph" in res.keys():
knowledge_graph = res["knowledge_graph"]
title = knowledge_graph["title"] if "title" in knowledge_graph else ""
if "description" in knowledge_graph.keys():
snippets.append(knowledge_graph["description"])
for key, value in knowledge_graph.items():
if (
isinstance(key, str)
and isinstance(value, str)
and key not in ["title", "description"]
and not key.endswith("_stick")
and not key.endswith("_link")
and not value.startswith("http")
):
snippets.append(f"{title} {key}: {value}.")
if "organic_results" in res.keys():
first_organic_result = res["organic_results"][0]
if "snippet" in first_organic_result.keys():
# snippets.append(first_organic_result["snippet"])
snippets.append(first_organic_result["link"])
elif "snippet_highlighted_words" in first_organic_result.keys():
snippets.append(first_organic_result["snippet_highlighted_words"])
elif "rich_snippet" in first_organic_result.keys():
snippets.append(first_organic_result["rich_snippet"])
elif "rich_snippet_table" in first_organic_result.keys():
snippets.append(first_organic_result["rich_snippet_table"])
elif "link" in first_organic_result.keys():
snippets.append(first_organic_result["link"])
if "buying_guide" in res.keys():
snippets.append(res["buying_guide"])
if "local_results" in res.keys() and "places" in res["local_results"].keys():
snippets.append(res["local_results"]["places"])
if len(snippets) > 0:
return str(snippets)
else:
return "No good search result found"
# 获取与某种鲜花相关的微博UID的函数
def get_UID(flower: str):
# search = SerpAPIWrapper()
search = CustomSerpAPIWrapper()
res = search.run(f"{flower}")
return res
生成文章介绍
重构对大V的爬取findbigV.py
"""
本文件是【部署一个鲜花网络电商的人脉工具(下)】章节的配套代码,课程链接:https://juejin.cn/book/7387702347436130304/section/7388070997553119282
您可以点击最上方的“运行“按钮,直接运行该文件;更多操作指引请参考Readme.md文件。
"""
# 设置OpenAI API密钥
# 导入所取的库
import re
from agents.weibo_agent import lookup_V
from tools.general_tool import remove_non_chinese_fields
from tools.scraping_tool import get_data
from tools.textgen_tool import generate_letter
import os
os.environ["SERPAPI_API_KEY"] = (
"Your SERPAPI API KEY"
)
def find_bigV(flower: str):
# 拿到UID
response_UID = lookup_V(flower_type=flower)
# 抽取UID里面的数字
UID = re.findall(r"\d+", response_UID)[0]
print("这位鲜花大V的微博ID是", UID)
# 根据UID爬取大V信息
person_info = get_data(UID)
print(person_info)
# 移除无用的信息
remove_non_chinese_fields(person_info)
print(person_info)
# 调用函数根据大V信息生成文本
result = generate_letter(information=person_info)
print(result)
return result
if __name__ == "__main__":
# 拿到UID
response_UID = lookup_V(flower_type="牡丹")
# 抽取UID里面的数字
UID = re.findall(r"\d+", response_UID)[0]
print("这位鲜花大V的微博ID是", UID)
# 根据UID爬取大V信息
person_info = get_data(UID)
print(person_info)
# 移除无用的信息
remove_non_chinese_fields(person_info)
print(person_info)
result = generate_letter(information=person_info)
print(result)
from flask import jsonify
import json
# 使用json.loads将字符串解析为字典
result = json.loads(result)
abc = jsonify(
{
"summary": result["summary"],
"facts": result["facts"],
"interest": result["interest"],
"letter": result["letter"],
}
)
重构tool文件,添加输出解析
general_tool.py
import re
def contains_chinese(s):
return bool(re.search("[\u4e00-\u9fa5]", s))
def remove_non_chinese_fields(d):
if isinstance(d, dict):
to_remove = [
key
for key, value in d.items()
if isinstance(value, (str, int, float, bool))
and (not contains_chinese(str(value)))
]
for key in to_remove:
del d[key]
for key, value in d.items():
if isinstance(value, (dict, list)):
remove_non_chinese_fields(value)
elif isinstance(d, list):
to_remove_indices = []
for i, item in enumerate(d):
if isinstance(item, (str, int, float, bool)) and (
not contains_chinese(str(item))
):
to_remove_indices.append(i)
else:
remove_non_chinese_fields(item)
for index in reversed(to_remove_indices):
d.pop(index)
parsing_tool
# 导入所需的类
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List
# 定义一个名为TextParsing的模型,描述了如何解析大V信息
class TextParsing(BaseModel):
summary: str = Field(description="大V个人简介") # 大V的简介或背景信息
facts: List[str] = Field(description="大V的特点") # 大V的一些显著特点或者事实
interest: List[str] = Field(
description="这个大V可能感兴趣的事情"
) # 大V可能感兴趣的主题或活动
letter: List[str] = Field(
description="一篇联络这个大V的邮件"
) # 联络大V的建议邮件内容
# 将模型对象转换为字典
def to_dict(self):
return {
"summary": self.summary,
"facts": self.facts,
"interest": self.interest,
"letter": self.letter,
}
# 创建一个基于Pydantic模型的解析器,用于将文本输出解析为特定的结构
letter_parser: PydanticOutputParser = PydanticOutputParser(pydantic_object=TextParsing)
scraping_tool.py
# 导入所需的库
import json
import requests
import time
# 定义爬取微博用户信息的函数
def scrape_weibo(url: str):
"""爬取相关鲜花服务商的资料"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
"Referer": "https://weibo.com",
}
cookies = {"cookie": """your cookie"""}
response = requests.get(url, headers=headers, cookies=cookies)
time.sleep(3) # 加上3s 的延时防止被反爬
return response.text
# 根据UID构建URL爬取信息
def get_data(id):
url = "https://weibo.com/ajax/profile/detail?uid={}".format(id)
html = scrape_weibo(url)
response = json.loads(html)
return response
search_tool.py
# 导入SerpAPIWrapper
from langchain.utilities import SerpAPIWrapper
# 重新定制SerpAPIWrapper,重构_process_response,返回URL
class CustomSerpAPIWrapper(SerpAPIWrapper):
def __init__(self):
super(CustomSerpAPIWrapper, self).__init__()
@staticmethod
def _process_response(res: dict) -> str:
"""Process response from SerpAPI."""
if "error" in res.keys():
raise ValueError(f"Got error from SerpAPI: {res['error']}")
if "answer_box_list" in res.keys():
res["answer_box"] = res["answer_box_list"]
if "answer_box" in res.keys():
answer_box = res["answer_box"]
if isinstance(answer_box, list):
answer_box = answer_box[0]
if "result" in answer_box.keys():
return answer_box["result"]
elif "answer" in answer_box.keys():
return answer_box["answer"]
elif "snippet" in answer_box.keys():
return answer_box["snippet"]
elif "snippet_highlighted_words" in answer_box.keys():
return answer_box["snippet_highlighted_words"]
else:
answer = {}
for key, value in answer_box.items():
if not isinstance(value, (list, dict)) and not (
isinstance(value, str) and value.startswith("http")
):
answer[key] = value
return str(answer)
elif "events_results" in res.keys():
return res["events_results"][:10]
elif "sports_results" in res.keys():
return res["sports_results"]
elif "top_stories" in res.keys():
return res["top_stories"]
elif "news_results" in res.keys():
return res["news_results"]
elif "jobs_results" in res.keys() and "jobs" in res["jobs_results"].keys():
return res["jobs_results"]["jobs"]
elif (
"shopping_results" in res.keys()
and "title" in res["shopping_results"][0].keys()
):
return res["shopping_results"][:3]
elif "questions_and_answers" in res.keys():
return res["questions_and_answers"]
elif (
"popular_destinations" in res.keys()
and "destinations" in res["popular_destinations"].keys()
):
return res["popular_destinations"]["destinations"]
elif "top_sights" in res.keys() and "sights" in res["top_sights"].keys():
return res["top_sights"]["sights"]
elif (
"images_results" in res.keys()
and "thumbnail" in res["images_results"][0].keys()
):
return str([item["thumbnail"] for item in res["images_results"][:10]])
snippets = []
if "knowledge_graph" in res.keys():
knowledge_graph = res["knowledge_graph"]
title = knowledge_graph["title"] if "title" in knowledge_graph else ""
if "description" in knowledge_graph.keys():
snippets.append(knowledge_graph["description"])
for key, value in knowledge_graph.items():
if (
isinstance(key, str)
and isinstance(value, str)
and key not in ["title", "description"]
and not key.endswith("_stick")
and not key.endswith("_link")
and not value.startswith("http")
):
snippets.append(f"{title} {key}: {value}.")
if "organic_results" in res.keys():
first_organic_result = res["organic_results"][0]
if "snippet" in first_organic_result.keys():
# snippets.append(first_organic_result["snippet"])
snippets.append(first_organic_result["link"])
elif "snippet_highlighted_words" in first_organic_result.keys():
snippets.append(first_organic_result["snippet_highlighted_words"])
elif "rich_snippet" in first_organic_result.keys():
snippets.append(first_organic_result["rich_snippet"])
elif "rich_snippet_table" in first_organic_result.keys():
snippets.append(first_organic_result["rich_snippet_table"])
elif "link" in first_organic_result.keys():
snippets.append(first_organic_result["link"])
if "buying_guide" in res.keys():
snippets.append(res["buying_guide"])
if "local_results" in res.keys() and "places" in res["local_results"].keys():
snippets.append(res["local_results"]["places"])
if len(snippets) > 0:
return str(snippets)
else:
return "No good search result found"
# 获取与某种鲜花相关的微博UID的函数
def get_UID(flower: str):
# search = SerpAPIWrapper()
search = CustomSerpAPIWrapper()
res = search.run(f"{flower}")
return res
textgen_tool.py
# 导入所需要的库
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from tools.parsing_tool import letter_parser
# 生成文案的函数
def generate_letter(information):
# 设计提示模板
letter_template = """
下面是这个人的微博信息 {information}
请你帮我:
1. 写一个简单的总结
2. 挑两件有趣的特点说一说
3. 找一些他比较感兴趣的事情
4. 写一篇热情洋溢的介绍信
\n{format_instructions}"""
prompt_template = PromptTemplate(
input_variables=["information"],
template=letter_template,
partial_variables={
"format_instructions": letter_parser.get_format_instructions()
},
)
# 初始化大模型
llm = ChatOpenAI(model_name="gpt-3.5-turbo")
# 初始化链
chain = LLMChain(llm=llm, prompt=prompt_template)
# 生成文案
result = chain.run(information=information)
return result
最后给出程序入口
app.py
"""
本文件是【部署一个鲜花网络电商的人脉工具(下)】章节的配套代码,课程链接:https://juejin.cn/book/7387702347436130304/section/7388070997553119282
您可以点击最上方的“运行“按钮,直接运行该文件;更多操作指引请参考Readme.md文件。
"""
# 导入所需的库和模块
from flask import Flask, render_template, request, jsonify
from findbigV import find_bigV
import json
# 实例化Flask应用
app = Flask(__name__)
# 主页路由,返回index.html模板
@app.route("/")
def index():
return render_template("index.html")
# 处理请求的路由,仅允许POST请求
@app.route("/process", methods=["POST"])
def process():
# 获取提交的花的名称
flower = request.form["flower"]
# 使用find_bigV函数获取相关数据
response_str = find_bigV(flower=flower)
# 使用json.loads将字符串解析为字典
response = json.loads(response_str)
# 返回数据的json响应
return jsonify(
{
"summary": response["summary"],
"facts": response["facts"],
"interest": response["interest"],
"letter": response["letter"],
}
)
# 判断是否是主程序运行,并设置Flask应用的host和debug模式
if __name__ == "__main__":
app.run(host="0.0.0.0", debug=True)
<!-- templates/index.html -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
<script src="https://lf3-static.bytednsdoc.com/obj/eden-cn/veh7vhpeps/jquery-3.6.0.min.js?x-resource-account=public"></script>
<title>人脉工具</title>
<link rel="stylesheet" href="https://lf3-static.bytednsdoc.com/obj/eden-cn/veh7vhpeps/all.min.css?x-resource-account=public" />
<div class="spinner-container" id="spinner-container" style="display: none;">
<i id="loading-spinner" class="fas fa-spinner fa-spin"></i>
</div>
</head>
<body>
<div class="container">
<h1>易速鲜花人脉工具</h1>
<form id="name-form">
<input type="text" id="flower" name="flower" placeholder="输入一种花(或者其它东西也行)">
<button id="magic-button" type="submit">找到大V</button>
</form>
<div id="result">
<img id="profile-pic" src="" alt="Profile Picture" style="display: none; max-width: 100%; height: auto; border-radius: 50%; margin-bottom: 20px;">
<h2>基本情况</h2>
<p id="summary"></p>
<h2>特色内容</h2>
<div id="facts"></div>
<h2>可能感兴趣的事儿</h2>
<div id="interest"></div>
<h2>联络邮件</h2>
<div id="letter"></div>
</div>
</div>
<script>
$(document).ready(function () {
$('#name-form').on('submit', function (e) {
e.preventDefault();
$('#spinner-container').show();
$.ajax({
url: '/process',
data: $('#name-form').serialize(),
type: 'POST',
success: function (response) {
$('#profile-pic').attr('src', 'https://media.licdn.com/dms/image/C5603AQFNBlle-yAc5g/profile-displayphoto-shrink_800_800/0/1517403045625?e=1702512000&v=beta&t=klKCelFxssjEw0Y1_3bmzP9YLy8yGijKz9_P16lGy5w');
$('#profile-pic').show();
$('#summary').text(response.summary);
$('#facts').html('<ul>' + response.facts.map(fact => '<li>' + fact + '</li>').join('') + '</ul>');
$('#interest').html('<ul>' + response.interest.map(interest => '<li>' + interest + '</li>').join('') + '</ul>');
$('#letter').text(response.letter);
},
error: function (error) {
console.log(error);
},
complete: function () {
$('#spinner-container').hide();
}
});
});
});
</script>
</body>
</html>
/* static/css/style.css */
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background-color: #f5f5f5;
color: #333;
line-height: 1.6;
}
.container {
width: 80%;
margin: 0 auto;
padding: 30px;
background-color: #ffffff;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
border-radius: 5px;
display: flex;
flex-direction: column;
align-items: center;
}
h1 {
font-size: 32px;
margin-bottom: 20px;
}
input[type="text"] {
width: 100%;
padding: 12px 20px;
margin: 8px 0;
box-sizing: border-box;
border: 2px solid #ccc;
border-radius: 4px;
background-color: #f8f8f8;
font-size: 14px;
}
button {
background-color: #4caf50;
border: none;
color: white;
padding: 15px 32px;
text-align: center;
text-decoration: none;
display: inline-block;
font-size: 16px;
margin: 4px 2px;
cursor: pointer;
border-radius: 4px;
transition: 0.3s;
}
button:hover {
background-color: #45a049;
}
#result {
margin-top: 30px;
width: 100%;
text-align: center;
}
#loading {
display: none;
position: fixed;
top: 0;
right: 0;
bottom: 0;
left: 0;
z-index: 999;
background-color: rgba(255, 255, 255, 0.8);
}
.loader {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
border: 8px solid #f3f3f3;
border-top: 8px solid #3498db;
border-radius: 50%;
width: 50px;
height: 50px;
animation: spin 2s linear infinite;
}
p {
font-size: 18px;
margin-bottom: 10px;
}
h2 {
font-size: 24px;
margin-bottom: 10px;
margin-top: 20px;
}
/* style.css */
/* Add or update this in your style.css file */
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background-color: #f5f5f5;
color: #333;
line-height: 1.6;
background-image: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
}
.spinner-container {
position: fixed;
display: flex;
align-items: center;
justify-content: center;
top: 0;
right: 0;
bottom: 0;
left: 0;
z-index: 999;
}
#loading-spinner {
font-size: 48px;
}
button {
background-color: #4caf50;
background-image: linear-gradient(135deg, #43c6ac 0%, #191654 100%);
border: none;
color: white;
padding: 15px 32px;
text-align: center;
text-decoration: none;
display: inline-block;
font-size: 16px;
margin: 4px 2px;
cursor: pointer;
border-radius: 4px;
transition: 0.3s;
}
ul {
list-style-type: none;
}
聊天机器人
聊天机器人(Chatbot)是LLM和LangChain的核心用例之一,很多人学习大语言模型,学习LangChain,就是为了开发出更好的、更能理解用户意图的聊天机器人。聊天机器人的核心特征是,它们可以进行长时间的对话并访问用户想要了解的信息。
设计过程中的组件包括
- 聊天模型:这是对话的基础,它更偏向于自然的对话风格。你可以参考LangChain相关文档中所支持的聊天模型的列表。尽管大模型(LLM)也可以应用于聊天机器人,但专用的聊天模型(Chat Model)更适合对话场景。
- 提示模板:帮助你整合默认消息、用户输入、历史交互以及检索时需要的上下文。
- 记忆:它允许机器人记住与用户之间的先前互动,增强对话连贯性。
- 检索器:这是一个可选组件,特别适合那些需要提供特定领域知识的机器人。
项目的技术实现细节
第一步: 通过LangChain的ConversationChain,实现一个最基本的聊天对话工具。
第二步: 通过LangChain中的记忆功能,让这个聊天机器人能够记住用户之前所说的话。
第三步: 通过LangChain中的检索功能,整合易速鲜花的内部文档资料,让聊天机器人不仅能够基于自己的知识,还可以基于易速鲜花的业务流程,给出专业的回答。
第四步(可选): 通过LangChain中的数据库查询功能,让用户可以输入订单号来查询订单状态,或者看看有没有存货等等。
第五步: 在网络上部署及发布这个聊天机器人,供企业内部员工和易速鲜花用户使用。
在上面的 5 个步骤中,我们使用到了很多LangChain技术,包括提示工程、模型、链、代理、RAG、数据库检索等。
项目实现
-
开发最基本的机器人
# 设置OpenAI API密钥 import os os.environ["OPENAI_API_KEY"] = 'Your OpenAI Key' # 导入所需的库和模块 from langchain.schema import ( HumanMessage, SystemMessage ) from langchain.chat_models import ChatOpenAI # 创建一个聊天模型的实例 chat = ChatOpenAI() # 创建一个消息列表 messages = [ SystemMessage(content="你是一个花卉行家。"), HumanMessage(content="朋友喜欢淡雅的颜色,她的婚礼我选择什么花?") ] # 使用聊天模型获取响应 response = chat(messages) print(response)运行如下:
content='对于喜欢淡雅的颜色的婚礼,你可以选择以下花卉:\n\n1. 白色玫瑰:白色玫瑰象征纯洁和爱情,它们能为婚礼带来一种优雅和浪漫的氛围。\n\n2. 紫色满天星:紫色满天星是十分优雅的小花,它们可以作为装饰花束或餐桌中心点使用,为婚礼增添一丝神秘感。\n\n3. 淡粉色康乃馨:淡粉色康乃馨是一种温馨而浪漫的花卉,能为婚礼带来一种柔和的氛围。\n\n4. 白色郁金香:白色郁金香代表纯洁和完美,它们可以为婚礼带来一种高贵和典雅的感觉。\n\n5. 淡紫色蓝雏菊:淡紫色蓝雏菊是一种可爱的小花,它们可以作为装饰花束或花冠使用,为婚礼增添一丝童真和浪漫。\n\n这些花卉都能营造出淡雅的氛围,并与婚礼的整体风格相得益彰。当然,你也可以根据你朋友的喜好和主题来选择适合的花卉。'设置循环对话
# 设置OpenAI API密钥 import os os.environ["OPENAI_API_KEY"] = 'Your OpenAI Key' # 导入所需的库和模块 from langchain.schema import HumanMessage, SystemMessage from langchain.chat_models import ChatOpenAI # 定义一个命令行聊天机器人的类 class CommandlineChatbot: # 在初始化时,设置花卉行家的角色并初始化聊天模型 def __init__(self): self.chat = ChatOpenAI() self.messages = [SystemMessage(content="你是一个花卉行家。")] # 定义一个循环来持续与用户交互 def chat_loop(self): print("Chatbot 已启动! 输入'exit'来退出程序。") while True: user_input = input("你: ") # 如果用户输入“exit”,则退出循环 if user_input.lower() == 'exit': print("再见!") break # 将用户的输入添加到消息列表中,并获取机器人的响应 self.messages.append(HumanMessage(content=user_input)) response = self.chat(self.messages) print(f"Chatbot: {response.content}") # 如果直接运行这个脚本,启动聊天机器人 if __name__ == "__main__": bot = CommandlineChatbot() bot.chat_loop() -
增加记忆机制
# 设置OpenAI API密钥 import os os.environ["OPENAI_API_KEY"] = 'Your OpenAI Key' # 导入所需的库和模块 from langchain.schema import HumanMessage, SystemMessage from langchain.memory import ConversationBufferMemory from langchain.prompts import ( ChatPromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ) from langchain.chains import LLMChain from langchain.chat_models import ChatOpenAI # 设置OpenAI API密钥 os.environ["OPENAI_API_KEY"] = 'Your OpenAI Key' # 带记忆的聊天机器人类 class ChatbotWithMemory: def __init__(self): # 初始化LLM self.llm = ChatOpenAI() # 初始化Prompt self.prompt = ChatPromptTemplate( messages=[ SystemMessagePromptTemplate.from_template( "你是一个花卉行家。你通常的回答不超过30字。" ), MessagesPlaceholder(variable_name="chat_history"), HumanMessagePromptTemplate.from_template("{question}") ] ) # 初始化Memory self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) # 初始化LLMChain with LLM, prompt and memory self.conversation = LLMChain( llm=self.llm, prompt=self.prompt, verbose=True, memory=self.memory ) # 与机器人交互的函数 def chat_loop(self): print("Chatbot 已启动! 输入'exit'来退出程序。") while True: user_input = input("你: ") if user_input.lower() == 'exit': print("再见!") break response = self.conversation({"question": user_input}) print(f"Chatbot: {response['text']}") if __name__ == "__main__": # 启动Chatbot bot = ChatbotWithMemory() bot.chat_loop()程序的核心是ChatbotWithMemory类,这是一个带有记忆功能的聊天机器人类。在这个类的初始化函数中,定义了一个对话缓冲区记忆,它会跟踪对话历史。在LLMChain被创建时,就整合了LLM、提示和记忆,形成完整的对话链。
-
添加检索机制
# 导入所需的库 import os from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Qdrant from langchain.memory import ConversationSummaryMemory from langchain.chat_models import ChatOpenAI from langchain.chains import ConversationalRetrievalChain from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import Docx2txtLoader from langchain.document_loaders import TextLoader # 设置OpenAI API密钥 os.environ["OPENAI_API_KEY"] = 'Your OpenAI Key' # ChatBot类的实现-带检索功能 class ChatbotWithRetrieval: def __init__(self, dir): # 加载Documents base_dir = dir # 文档的存放目录 documents = [] for file in os.listdir(base_dir): file_path = os.path.join(base_dir, file) if file.endswith('.pdf'): loader = PyPDFLoader(file_path) documents.extend(loader.load()) elif file.endswith('.docx') or file.endswith('.doc'): loader = Docx2txtLoader(file_path) documents.extend(loader.load()) elif file.endswith('.txt'): loader = TextLoader(file_path) documents.extend(loader.load()) # 文本的分割 text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=0) all_splits = text_splitter.split_documents(documents) # 向量数据库 self.vectorstore = Qdrant.from_documents( documents=all_splits, # 以分块的文档 embedding=OpenAIEmbeddings(), # 用OpenAI的Embedding Model做嵌入 location=":memory:", # in-memory 存储 collection_name="my_documents",) # 指定collection_name # 初始化LLM self.llm = ChatOpenAI() # 初始化Memory self.memory = ConversationSummaryMemory( llm=self.llm, memory_key="chat_history", return_messages=True ) # 设置Retrieval Chain retriever = self.vectorstore.as_retriever() self.qa = ConversationalRetrievalChain.from_llm( self.llm, retriever=retriever, memory=self.memory ) # 交互对话的函数 def chat_loop(self): print("Chatbot 已启动! 输入'exit'来退出程序。") while True: user_input = input("你: ") if user_input.lower() == 'exit': print("再见!") break # 调用 Retrieval Chain response = self.qa(user_input) print(f"Chatbot: {response['answer']}") if __name__ == "__main__": # 启动Chatbot folder = "OneFlower" bot = ChatbotWithRetrieval(folder) bot.chat_loop() -
通过Gradio重构机器人
# 导入所需的库 import os import gradio as gr from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Qdrant from langchain.memory import ConversationSummaryMemory from langchain.chat_models import ChatOpenAI from langchain.chains import ConversationalRetrievalChain from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import Docx2txtLoader from langchain.document_loaders import TextLoader # 设置OpenAI API密钥 os.environ["OPENAI_API_KEY"] = 'Your OpenAI Key' class ChatbotWithRetrieval: def __init__(self, dir): # 加载Documents base_dir = dir # 文档的存放目录 documents = [] for file in os.listdir(base_dir): file_path = os.path.join(base_dir, file) if file.endswith('.pdf'): loader = PyPDFLoader(file_path) documents.extend(loader.load()) elif file.endswith('.docx') or file.endswith('.doc'): loader = Docx2txtLoader(file_path) documents.extend(loader.load()) elif file.endswith('.txt'): loader = TextLoader(file_path) documents.extend(loader.load()) # 文本的分割 text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=0) all_splits = text_splitter.split_documents(documents) # 向量数据库 self.vectorstore = Qdrant.from_documents( documents=all_splits, # 以分块的文档 embedding=OpenAIEmbeddings(), # 用OpenAI的Embedding Model做嵌入 location=":memory:", # in-memory 存储 collection_name="my_documents",) # 指定collection_name # 初始化LLM self.llm = ChatOpenAI() # 初始化Memory self.memory = ConversationSummaryMemory( llm=self.llm, memory_key="chat_history", return_messages=True ) # 初始化对话历史 self.conversation_history = "" # 设置Retrieval Chain retriever = self.vectorstore.as_retriever() self.qa = ConversationalRetrievalChain.from_llm( self.llm, retriever=retriever, memory=self.memory ) def get_response(self, user_input): # 这是为 Gradio 创建的新函数 response = self.qa(user_input) # 更新对话历史 self.conversation_history += f"你: {user_input}\nChatbot: {response['answer']}\n" return self.conversation_history if __name__ == "__main__": folder = "OneFlower" bot = ChatbotWithRetrieval(folder) # 定义 Gradio 界面 interface = gr.Interface( fn=bot.get_response, # 使用我们刚刚创建的函数 inputs="text", # 输入是文本 outputs="text", # 输出也是文本 live=False, # 实时更新,这样用户可以连续与模型交互 title="易速鲜花智能客服", # 界面标题 description="请输入问题,然后点击提交。" # 描述 ) interface.launch() # 启动 Gradio 界面