Node.js+PostgreSQL搭建RAG内容检索服务

143 阅读1分钟

Node.js+PostgreSQL搭建内容检索服务

  • 使用Midway.js搭建Node服务
  • 使用pg+typeorm实现向量数据检索

1. 修改ORM数据库配置

// config.{env}.js 
当前项目配置了多个数据库,请按需配置
export default {
    // 请申请RDS并替换DB配置
    orm: {
      default: {
        type: 'mysql',
        host: '******',
        port: 3306,
        username: '******',
        password: '******',
        database: '******',
        synchronize: false,
        logging: false,
        charset: 'utf8mb4',
      },
      vdb: {
        type: 'postgres',
        host: '******',
        port: 5432,
        username: 'ai_base_vdb',
        password: '******',
        database: 'ai_base_vdb',
      },
    },
} 

2. 创建EntityModel

import { Column, PrimaryColumn, CreateDateColumn } from 'typeorm';
import { EntityModel } from '@midwayjs/orm';
// 指定数据表
@EntityModel('documents_vector_table')
export class DocumentsVectorTable {
  @PrimaryColumn()
  id: number;

  @CreateDateColumn({
    name: 'gmt_create',
  })
  gmtCreate: Date;

  @Column({
    name: 'document_id',
  })
  documentId: number;

  @Column({
    name: 'meta_data',
  })
  metaData: string;

  @Column({
    name: 'vector_data',
  })
  vectorData: string;
}

3. 创建VectorDB Service

import { Provide, Inject, Config, Logger } from '@midwayjs/core';
import { ILogger } from '@midwayjs/logger';
import { DocumentsVectorTable } from '../entity/DocumentsVectorTable';
import { InjectEntityModel } from '@midwayjs/orm';
import { Repository } from 'typeorm';
@Provide()
export class VectorDB {
  @Inject()
  ctx;

  @Config('env')
  env: string;

  @Logger()
  logger: ILogger;
  // 指定EntityModel所属Orm配置
  @InjectEntityModel(DocumentsVectorTable, 'vdb')
  DocumentsVectorTableModel: Repository<DocumentsVectorTable>;

  /**
   * 查询文档列表
   * @returns
   */
  async findDocumentsData() {
    const data = await this.DocumentsVectorTableModel.find();
    return data;
  }

  /**
   * 存储文档数据
   * @param id 主键
   * @param documentId 文档ID
   * @param metaData 元数据
   * @param vectorData 向量数据
   * @returns
   */
  async saveDocumentData(
    id,
    documentId: number,
    metaData: string,
    vectorData: number[]
  ) {
    const result = await this.DocumentsVectorTableModel.save({
      id,
      gmtCreate: new Date(),
      documentId,
      metaData,
      vectorData: `[${vectorData.join(',')}]`,
    });
    return result;
  }

  /**
   * 通过余弦计算进行内容检索
   * @param inputVector
   */
  async findSimilarDocuments(inputVector: number[]) {
    const data = await this.DocumentsVectorTableModel.query(
      `
      SELECT
        id,
        meta_data,
        cosine_similarity(vector_data, ARRAY[${inputVector.join(
          ','
        )}]) AS similarity
      FROM
        "documents_vector_table"
      ORDER BY
        similarity DESC
      LIMIT 5 
    `
    );
    return data;
  }
}

4. 编写控制器,触发Rag内容召回

  @Post('/findSimilarDocuments')
  async findSimilarDocuments(@Body() { queryString }) {
    const [embedding]: number[][] =
      // 自封装的TextEmbeddingV2方法
      await this.AmapEmbeddingV2.transDocumentsByAmapEmbeddingV2(
        [queryString],
        'document'
      );
    const result = await this.VectorDB.findSimilarDocuments(embedding);
    return result;
  }

5. 执行结果

通过检索服务,对比与内容最相近的文档片段,并按相似度返回

image.png