《NestJS智能体开发》(三):向量数据库

1,652 阅读4分钟

什么是向量数据库?

向量数据库是一种专门用于存储和查询高维向量数据的数据库。与传统的关系型数据库不同,向量数据库的核心功能是提供高效的相似度搜索,使得查询向量能够找到与之最接近的向量。它通过将非结构化数据(如文本、图像、音频等)转换为向量形式进行存储,然后利用先进的索引方法来加速搜索过程

image.png

为什么要使用向量数据库?

在智能体(Agent)开发中,使用向量数据库带来多方面优势:

1. 高效处理非结构化数据

向量数据库擅长存储和检索高维向量数据,这些数据源自文本、图像等非结构化数据。通过将非结构化数据转换为向量,向量数据库利用高效索引和搜索机制进行处理,而传统数据库在处理这类数据时存在局限。

2. 高效的相似性搜索

向量数据库内置近似最近邻 (ANN) 算法,能快速找到与查询向量最相似的向量,适用于推荐系统、图像搜索等领域,提升应用的速度和准确性。

3. 支持记忆管理

向量数据库是对话智能体记忆管理的核心,能高效存储和检索文本、音频等数据的向量表示。智能体通过编码查询内容并搜索向量数据库,访问长期记忆系统,利用检索到的信息回答用户。

4. 优化生成结果

向量数据库在检索增强生成 (RAG) 中发挥重要作用,通过提供特定领域知识,微调大型语言模型的响应,提高生成文本的准确性和相关性。

image.png

Milvus

Milvus 是一个开源的向量数据库,专为管理和检索大量向量数据而设计,广泛应用于人工智能、推荐系统、图像检索、自然语言处理等领域。它具备以下特点和优势:

  • 高性能和可扩展性:Milvus 可以在从笔记本电脑到大型分布式系统等各种环境中高效运行,支持 PB 级别的数据存储。
  • 多种索引方式:Milvus 提供了多种索引方式,如 IVFFlat、HNSW 等,以满足不同场景下的搜索需求。
  • 丰富的功能:Milvus 支持向量的增删改查、相似度搜索、向量索引管理等功能。

image.png

安装Milvus

使用Docker Compose安装Milvus

version: '3.5'

services:
  etcd:
    container_name: milvus-etcd
    image: quay.io/coreos/etcd:v3.5.16
    environment:
      - ETCD_AUTO_COMPACTION_MODE=revision
      - ETCD_AUTO_COMPACTION_RETENTION=1000
      - ETCD_QUOTA_BACKEND_BYTES=4294967296
      - ETCD_SNAPSHOT_COUNT=50000
    volumes:
      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
    command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
    healthcheck:
      test: ["CMD", "etcdctl", "endpoint", "health"]
      interval: 30s
      timeout: 20s
      retries: 3

  minio:
    container_name: milvus-minio
    image: minio/minio:RELEASE.2023-03-20T20-16-18Z
    environment:
      MINIO_ACCESS_KEY: minioadmin
      MINIO_SECRET_KEY: minioadmin
    ports:
      - "9001:9001"
      - "9000:9000"
    volumes:
      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
    command: minio server /minio_data --console-address ":9001"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
      interval: 30s
      timeout: 20s
      retries: 3

  standalone:
    container_name: milvus-standalone
    image: milvusdb/milvus:v2.5.4
    command: ["milvus", "run", "standalone"]
    security_opt:
    - seccomp:unconfined
    environment:
      ETCD_ENDPOINTS: etcd:2379
      MINIO_ADDRESS: minio:9000
    volumes:
      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
      interval: 30s
      start_period: 90s
      timeout: 20s
      retries: 3
    ports:
      - "19530:19530"
      - "9091:9091"
    depends_on:
      - "etcd"
      - "minio"

networks:
  default:
    name: milvus

在 Nest 中集成 Milvus Module

安装依赖:

pnpm add @zilliz/milvus2-sdk-node
pnpm add @grpc/grpc-js -D

创建Milvus Module目录结构如下:

./src/shared/modules/milvus
├── index.ts
├── milvus-module-options.interface.ts
├── milvus.module-definition.ts
├── milvus.module.ts
└── milvus.service.ts

milvus-module-options.interface.ts

import { ChannelOptions } from '@grpc/grpc-js';
import { ClientConfig } from "@zilliz/milvus2-sdk-node";

export interface MilvusModuleOptions {
    configOrAddress: ClientConfig | string;
    ssl?: boolean;
    username?: string;
    password?: string;
    channelOptions?: ChannelOptions;
}

milvus.module-definition.ts

import { ConfigurableModuleBuilder } from "@nestjs/common";
import { MilvusModuleOptions } from "./milvus-module-options.interface";

export const {
	ConfigurableModuleClass,
	MODULE_OPTIONS_TOKEN,
	OPTIONS_TYPE,
	ASYNC_OPTIONS_TYPE,
} = new ConfigurableModuleBuilder<MilvusModuleOptions>()
	.setExtras(
		{
			isGlobal: true,
		},
		(definition, extras) => ({
			...definition,
			global: extras.isGlobal,
		}),
	)
	.build();

milvus.service.ts

import { Inject, Injectable } from "@nestjs/common";
import { MilvusClient, } from "@zilliz/milvus2-sdk-node";
import { MilvusModuleOptions } from "./milvus-module-options.interface";
import { MODULE_OPTIONS_TOKEN } from "./milvus.module-definition";

@Injectable()
export class MilvusService extends MilvusClient {
    constructor(
        @Inject(MODULE_OPTIONS_TOKEN)
        private readonly options: MilvusModuleOptions,
    ) {
        if (!options) {
            throw new Error("MilvusModuleOptions is not defined");
        }
        super(
            options.configOrAddress,
            options.ssl,
            options.username,
            options.password,
            options.channelOptions
        );
    }
}

milvus.module.ts

import { DynamicModule, Module } from "@nestjs/common";
import { ASYNC_OPTIONS_TYPE, ConfigurableModuleClass, OPTIONS_TYPE } from "./milvus.module-definition";
import { MilvusService } from "./milvus.service";

@Module({
    providers: [MilvusService],
    exports: [MilvusService]
})
export class MilvusModule extends ConfigurableModuleClass {
    static register(options: typeof OPTIONS_TYPE): DynamicModule {
        return {
            ...super.register(options),
        };
    }

    static registerAsync(options: typeof ASYNC_OPTIONS_TYPE): DynamicModule {
        return {
            ...super.registerAsync(options),
        };
    }
}

创建配置文件

milvus.config.ts

import { MilvusModuleOptions } from "@/shared/modules/milvus";
import { registerAs } from "@nestjs/config";

export default registerAs<MilvusModuleOptions>("milvus", () => {
    return {
        configOrAddress: process.env.MILVUS_ADDRESS // http://127.0.0.1:19530
    };
});

导入MilvusModule

MilvusModule.registerAsync({
    inject: [ConfigService],
    useFactory: (config: ConfigService) => config.get("milvus"),
})

使用MilvusService

import { MilvusService } from '@/shared/modules/milvus';
import { Controller } from "@nestjs/common";

@Controller()
export class HomeController {
    constructor(private readonly milvusService: MilvusService) {
        super();
    }
}

查询版本信息

const res = await this.milvusService.getVersion();

console.log(res);

// {
//     status: {
//         extra_info: { },
//         error_code: 'Success',
//         reason: '',
//         code: 0,
//         retriable: false,
//         detail: ''
//     },
//     version: 'v2.5.4'
// }

创建数据库

const res = await this.milvusService.createDatabase({
   db_name: "my_database",
});

console.log(res);

// {
//   error_code: 'Success',
//   reason: '',
//   code: 0,
//   retriable: false,
//   detail: ''
// }

创建集合

await this.milvusService.createCollection({ 
    collection_name: "customized_setup_1", 
    fields: fields, 
    index_params: index_params
});

插入数据

const data = [
    { id: 0, vector: [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], color: "pink_8682" },
    { id: 1, vector: [0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104], color: "red_7025" },
    { id: 2, vector: [0.43742130801983836, -0.5597502546264526, 0.6457887650909682, 0.7894058910881185, 0.20785793220625592], color: "orange_6781" },
    { id: 3, vector: [0.3172005263489739, 0.9719044792798428, -0.36981146090600725, -0.4860894583077995, 0.95791889146345], color: "pink_9298" },
    { id: 4, vector: [0.4452349528804562, -0.8757026943054742, 0.8220779437047674, 0.46406290649483184, 0.30337481143159106], color: "red_4794" },
    { id: 5, vector: [0.985825131989184, -0.8144651566660419, 0.6299267002202009, 0.1206906911183383, -0.1446277761879955], color: "yellow_4222" },
    { id: 6, vector: [0.8371977790571115, -0.015764369584852833, -0.31062937026679327, -0.562666951622192, -0.8984947637863987], color: "red_9392" },
    { id: 7, vector: [-0.33445148015177995, -0.2567135004164067, 0.8987539745369246, 0.9402995886420709, 0.5378064918413052], color: "grey_8510" },
    { id: 8, vector: [0.39524717779832685, 0.4000257286739164, -0.5890507376891594, -0.8650502298996872, -0.6140360785406336], color: "white_9381" },
    { id: 9, vector: [0.5718280481994695, 0.24070317428066512, -0.3737913482606834, -0.06726932177492717, -0.6980531615588608], color: "purple_4976" }
];

const res = await this.milvusService.insert({
    collection_name: "quick_setup",
    data: data,
});

删除数据

await this.milvusService.delete({
    collection_name: "quick_setup",
    // highlight-next-line
    filter: "color in ['red', 'green']"
});

查询数据

const query_vector = [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592];

const res = await this.milvusService.search({
    collection_name: "my_collection",
    data: query_vector,
    limit: 3, // The number of results to return
});

console.log(res.results)

// [
//   { score: 0.08821295201778412, id: '551' },
//   { score: 0.0800950899720192, id: '296' },
//   { score: 0.07794742286205292, id: '43' }
// ]

更多用法请参考官方文档