一、安装milvus
pip install milvus
用镜像源下载快很多
# 清华源
pip install milvus -i https://pypi.tuna.tsinghua.edu.cn/simple
# 阿里云
pip install milvus -i https://mirrors.aliyun.com/pypi/simple/
# 豆瓣
pip install milvus -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
测试连接脚本(保存为test_milvus.py)
from milvus import default_server
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection
# 启动本地服务
default_server.start()
# 连接
connections.connect(host='127.0.0.1', port=default_server.listen_port)
# 创建collection测试
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=128)
]
schema = CollectionSchema(fields, "test")
collection = Collection("test_collection", schema)
print("Milvus Lite 跑通!Collection创建成功")
# 停止服务
default_server.stop()
执行:
python test_milvus.py
二、接入真实embedding模型
一、安装embedding依赖
pip install sentence-transformers -i https://pypi.tuna.tsinghua.edu.cn/simple
二、测试脚本(保存为test_embedding.py)
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # 强制用CPU
from sentence_transformers import SentenceTransformer
# 加载模型(用CPU,慢但稳定)
model = SentenceTransformer('BAAI/bge-large-zh', device='cpu')
# 测试
text = "测试文本"
vector = model.encode(text)
print(f"向量维度: {len(vector)}")
print(f"前5个值: {vector[:5]}")
print("CPU模式跑通!")
三、把embedding和Milvus连起来——文本→向量→存入Milvus→搜索返回
一、测试脚本(保存为test_embedding.py)
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
from milvus import default_server
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection
from sentence_transformers import SentenceTransformer
# 1. 启动Milvus
default_server.start()
connections.connect(host='127.0.0.1', port=default_server.listen_port)
# 2. 创建collection(维度1024,匹配BGE模型)
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=500),
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=1024)
]
schema = CollectionSchema(fields, "doc_collection")
collection = Collection("docs", schema)
# 3. 加载模型
model = SentenceTransformer('BAAI/bge-large-zh', device='cpu')
# 4. 插入数据
texts = ["这是第一段测试文本", "这是第二段关于财务的内容"]
vectors = [model.encode(t) for t in texts]
entities = [texts, vectors]
collection.insert(entities)
collection.flush()
print(f"插入 {len(texts)} 条数据")
# 5. 创建索引并加载
index_params = {"metric_type": "L2", "index_type": "FLAT", "params": {}}
collection.create_index("vector", index_params)
collection.load()
# 6. 搜索测试
query = "财务相关内容"
query_vec = model.encode(query)
results = collection.search([query_vec], "vector", param={"metric_type": "L2"}, limit=2)
print(f"查询: {query}")
for hits in results:
for hit in hits:
print(f" 找到: id={hit.id}, distance={hit.distance}")
# 7. 清理
default_server.stop()
print("RAG流程跑通!")
四、真实文档流程
创建 docs 文件夹,放入任意文本:
mkdir -p ~/milvus/docs
创建3个测试文档(复制粘贴任意内容)
echo "财务报销流程:员工提交发票→部门审批→财务审核→打款。注意事项:发票抬头需为公司全称,金额超过1000元需附明细清单。" > ~/milvus/docs/报销.txt
echo "2024年税收政策:小微企业增值税起征点提高至月销售额10万元,所得税减免延续至2027年底。申报方式:电子税务局在线提交。" > ~/milvus/docs/税务.txt
echo "固定资产管理办法:单价超过5000元的设备需登记台账,每年盘点一次,报废需经三层审批。" > ~/milvus/docs/资产.txt
day4_real_docs.py
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
from milvus import default_server
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection
from sentence_transformers import SentenceTransformer
import glob
# 1. 启动Milvus
default_server.start()
connections.connect(host='127.0.0.1', port=default_server.listen_port)
# 2. 创建collection
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="filename", dtype=DataType.VARCHAR, max_length=100),
FieldSchema(name="chunk", dtype=DataType.VARCHAR, max_length=1000),
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=1024)
]
schema = CollectionSchema(fields, "doc_chunks")
collection = Collection("real_docs", schema)
# 3. 加载模型
model = SentenceTransformer('BAAI/bge-large-zh', device='cpu')
# 4. 读取文档并切分(简单按段落切分)
docs_path = "/home/ubuntu/milvus/docs/*.txt"
all_chunks = []
for filepath in glob.glob(docs_path):
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 简单切分:每100字一段
for i in range(0, len(content), 100):
chunk = content[i:i+100]
if len(chunk) > 20: # 过滤太短
all_chunks.append({
'filename': os.path.basename(filepath),
'text': chunk
})
print(f"共切分 {len(all_chunks)} 段")
# 5. 向量化并插入
batch_size = 4 # CPU慢,小批量
for i in range(0, len(all_chunks), batch_size):
batch = all_chunks[i:i+batch_size]
texts = [item['text'] for item in batch]
filenames = [item['filename'] for item in batch]
vectors = [model.encode(t) for t in texts]
entities = [filenames, texts, vectors]
collection.insert(entities)
print(f"插入 {i+len(batch)}/{len(all_chunks)}")
collection.flush()
print(f"总计插入 {len(all_chunks)} 段")
# 6. 创建索引
index_params = {"metric_type": "L2", "index_type": "FLAT", "params": {}}
collection.create_index("vector", index_params)
collection.load()
# 7. 搜索测试
# query = "怎么报销发票"
# query_vec = model.encode(query)
# results = collection.search([query_vec], "vector", param={"metric_type": "L2"}, limit=3)
# print(f"\n查询: {query}")
# for hits in results:
# for hit in hits:
# print(f" 来自 {hit.entity.get('filename')}: {hit.entity.get('chunk')[:50]}... (距离: {hit.distance:.3f})")
# default_server.stop()
# print("\nDay 4 完成!真实文档流程跑通")
# 7. 搜索测试
query = "怎么报销发票"
query_vec = model.encode(query)
results = collection.search([query_vec], "vector", param={"metric_type": "L2"}, limit=3, output_fields=["filename", "chunk"])
print(f"\n查询: {query}")
for hits in results:
for hit in hits:
filename = hit.entity.get('filename') if hit.entity else "unknown"
chunk = hit.entity.get('chunk') if hit.entity else "unknown"
print(f" 来自 {filename}: {chunk[:50] if chunk else 'N/A'}... (距离: {hit.distance:.3f})")
default_server.stop()
print("\nDay 4 完成!真实文档流程跑通")
五、接入DeepSeek API,实现"问问题→检索→LLM生成回答"
export DEEPSEEK_API_KEY="sk-你的key"
day5_rag_final.py
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
from milvus import default_server
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
from sentence_transformers import SentenceTransformer
from openai import OpenAI
import glob
# 1. 启动Milvus
default_server.start()
connections.connect(host='127.0.0.1', port=default_server.listen_port)
# 2. 连接DeepSeek
client = OpenAI(
api_key=os.getenv("DEEPSEEK_API_KEY"),
base_url="https://api.deepseek.com"
)
# 3. 加载模型
model = SentenceTransformer('BAAI/bge-large-zh', device='cpu')
# 4. 检查并创建collection(如果不存在)
collection_name = "real_docs_v2"
if utility.has_collection(collection_name):
collection = Collection(collection_name)
print(f"复用已有collection: {collection_name}")
else:
# 创建新collection
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="filename", dtype=DataType.VARCHAR, max_length=100),
FieldSchema(name="chunk", dtype=DataType.VARCHAR, max_length=1000),
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=1024)
]
schema = CollectionSchema(fields, "doc_chunks")
collection = Collection(collection_name, schema)
# 重新插入数据
docs_path = "/home/ubuntu/milvus/docs/*.txt"
all_chunks = []
for filepath in glob.glob(docs_path):
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
for i in range(0, len(content), 100):
chunk = content[i:i+100]
if len(chunk) > 20:
all_chunks.append({
'filename': os.path.basename(filepath),
'text': chunk
})
print(f"重新插入 {len(all_chunks)} 段数据...")
batch_size = 4
for i in range(0, len(all_chunks), batch_size):
batch = all_chunks[i:i+batch_size]
texts = [item['text'] for item in batch]
filenames = [item['filename'] for item in batch]
vectors = [model.encode(t) for t in texts]
entities = [filenames, texts, vectors]
collection.insert(entities)
collection.flush()
index_params = {"metric_type": "L2", "index_type": "FLAT", "params": {}}
collection.create_index("vector", index_params)
print(f"插入完成,共 {len(all_chunks)} 段")
collection.load()
# 5. RAG查询函数
def rag_query(user_question):
query_vec = model.encode(user_question)
results = collection.search([query_vec], "vector", param={"metric_type": "L2"}, limit=3, output_fields=["filename", "chunk"])
contexts = []
for hits in results:
for hit in hits:
chunk = hit.entity.get('chunk') if hit.entity else ""
contexts.append(chunk)
context = "\n".join(contexts)
prompt = f"""基于以下参考信息回答问题:
{context}
问题:{user_question}
回答:"""
response = client.chat.completions.create(
model="deepseek-chat",
messages=[{"role": "user", "content": prompt}],
stream=False
)
return response.choices[0].message.content, contexts
# 6. 测试
question = "怎么报销发票?"
print(f"问题:{question}")
answer, sources = rag_query(question)
print(f"\n参考片段:")
for i, ctx in enumerate(sources, 1):
print(f"{i}. {ctx[:80]}...")
print(f"\n生成回答:\n{answer}")
default_server.stop()
print("\nDay 5 完成!端到端RAG跑通")
踩过的坑
hit.entity是None,说明搜索结果没有正确关联到存储的字段。这是Milvus API版本问题——Lite模式的search返回格式与标准版不同。
Milvus Lite是内存存储,上次
default_server.stop()后数据清空。需要重新插入数据,或者改用持久化存储。