Node.js+PostgreSQL搭建内容检索服务
- 使用Midway.js搭建Node服务
- 使用pg+typeorm实现向量数据检索
1. 修改ORM数据库配置
// config.{env}.js
当前项目配置了多个数据库,请按需配置
export default {
// 请申请RDS并替换DB配置
orm: {
default: {
type: 'mysql',
host: '******',
port: 3306,
username: '******',
password: '******',
database: '******',
synchronize: false,
logging: false,
charset: 'utf8mb4',
},
vdb: {
type: 'postgres',
host: '******',
port: 5432,
username: 'ai_base_vdb',
password: '******',
database: 'ai_base_vdb',
},
},
}
2. 创建EntityModel
import { Column, PrimaryColumn, CreateDateColumn } from 'typeorm';
import { EntityModel } from '@midwayjs/orm';
// 指定数据表
@EntityModel('documents_vector_table')
export class DocumentsVectorTable {
@PrimaryColumn()
id: number;
@CreateDateColumn({
name: 'gmt_create',
})
gmtCreate: Date;
@Column({
name: 'document_id',
})
documentId: number;
@Column({
name: 'meta_data',
})
metaData: string;
@Column({
name: 'vector_data',
})
vectorData: string;
}
3. 创建VectorDB Service
import { Provide, Inject, Config, Logger } from '@midwayjs/core';
import { ILogger } from '@midwayjs/logger';
import { DocumentsVectorTable } from '../entity/DocumentsVectorTable';
import { InjectEntityModel } from '@midwayjs/orm';
import { Repository } from 'typeorm';
@Provide()
export class VectorDB {
@Inject()
ctx;
@Config('env')
env: string;
@Logger()
logger: ILogger;
// 指定EntityModel所属Orm配置
@InjectEntityModel(DocumentsVectorTable, 'vdb')
DocumentsVectorTableModel: Repository<DocumentsVectorTable>;
/**
* 查询文档列表
* @returns
*/
async findDocumentsData() {
const data = await this.DocumentsVectorTableModel.find();
return data;
}
/**
* 存储文档数据
* @param id 主键
* @param documentId 文档ID
* @param metaData 元数据
* @param vectorData 向量数据
* @returns
*/
async saveDocumentData(
id,
documentId: number,
metaData: string,
vectorData: number[]
) {
const result = await this.DocumentsVectorTableModel.save({
id,
gmtCreate: new Date(),
documentId,
metaData,
vectorData: `[${vectorData.join(',')}]`,
});
return result;
}
/**
* 通过余弦计算进行内容检索
* @param inputVector
*/
async findSimilarDocuments(inputVector: number[]) {
const data = await this.DocumentsVectorTableModel.query(
`
SELECT
id,
meta_data,
cosine_similarity(vector_data, ARRAY[${inputVector.join(
','
)}]) AS similarity
FROM
"documents_vector_table"
ORDER BY
similarity DESC
LIMIT 5
`
);
return data;
}
}
4. 编写控制器,触发Rag内容召回
@Post('/findSimilarDocuments')
async findSimilarDocuments(@Body() { queryString }) {
const [embedding]: number[][] =
// 自封装的TextEmbeddingV2方法
await this.AmapEmbeddingV2.transDocumentsByAmapEmbeddingV2(
[queryString],
'document'
);
const result = await this.VectorDB.findSimilarDocuments(embedding);
return result;
}
5. 执行结果
通过检索服务,对比与内容最相近的文档片段,并按相似度返回