LLM 应用开发入门 - 实现 langchain.js ChatModel 接入火山引擎大模型和实现一个 CLI 聊天机器人（下）

书接上回，我们已经实现了一个 langchain.js 接入火山引擎的 ChatModel。

本文我们实现将这个大模型接入到聊天 CLI 实现和大模型进行交互式问答

需求

我们希望这个简易的聊天 CLI 能够拥有以下功能

启动时由用户输入 prompt
支持回答流式输出
支持连续聊天和清空上下文

聊天 CLI 基础能力实现

由于实现基本的 CLI 输入输出不是本文重点。这里我们直接通过以下代码实现一个简单的 node.js 交互式程序，实现了

启动后接收用户输入的 prompt
接收/clear 指令打印清空
其它输入后原样打印

import readline from 'node:readline'
import process, { stdin, stdout } from 'node:process'
import { EventEmitter } from 'node:events'

class ChatCli extends EventEmitter {
  constructor() {
    super()

    this.input = stdin
    this.output = stdout
    this.input.setEncoding('utf-8')
    this.output.setEncoding('utf-8')
  }

  async runInputLoop() {
    const prompt = await this.prompt('请输入 prompt\n > ')

    console.log('prompt', prompt)

    return new Promise((resolve) => {
      const rl = readline.createInterface(this.input, this.output)

      rl.setPrompt('> ')
      rl.prompt()

      rl.on('line', async (line) => {
        if (line === '\\clear') {
          this.write('清空上下文\n')
        }
        else {
          this.write(`xxx ${line}`)
          this.write('\n')
        }

        rl.prompt()
      })

      rl.on('close', resolve)

      rl.on('SIGINT', () => {
        rl.close()
        process.emit('SIGINT', 'SIGINT')
      })
    })
  }

  write(data) {
    this.output.write(data)
  }

  prompt(query = '> ') {
    return new Promise((resolve) => {
      const rl = readline.createInterface(this.input, this.output)
      rl.question(query, (answer) => {
        resolve(answer)
        rl.close()
      })
    })
  }
}

const cli = new ChatCli()

cli.runInputLoop()

大模型接入

由于聊天 CLI 已经实现，我们只需要在对应的代码点进行模型的交互。相关接入火山引擎细节见LLM 应用开发入门 - 实现 langchain.js ChatModel 接入火山引擎大模型和实现一个 CLI 聊天机器人（上）

初始化 langchain 火山大模型

构造函数中初始化火山大模型

import { ChatVolcengine } from 'langchain-bytedance-volcengine'

class ChatCli extends EventEmitter {
  constructor() {
    super()
    // ....
    // 初始化火山大模型
    this.chatModel = new ChatVolcengine({
      volcengineApiHost: process.env.VOLCENGINE_HOST,
      volcengineApiKey: process.env.VOLCENGINE_API_KEY,
      model: process.env.VOLCENGINE_MODEL,
    })

    // ....
  }
}

prompt 接收和大模型聊天交互

将接受的 prompt 作为SystemMessage和将用户输入作为 HumanMessage传入stream方法。这里的SystemMessage和HumanMessage也是 langchain 提供的工具类用于构造消息
解析大模型返回的流式数据输出到终端

import { HumanMessage, SystemMessage } from '@langchain/core/messages'

 async runInputLoop() {
    const prompt = await this.prompt('请输入 prompt\n > ')
    return new Promise((resolve) => {
      //....

      rl.on('line', async (line) => {
        if (line === '\\clear') {
          this.write('清空上下文\n')
        }
        else {
          const stream = await this.chatModel.stream([new SystemMessage(prompt), new HumanMessage(line)])

          for await (const chunk of stream) {
            this.write(chunk.content)
          }
          this.write('\n')
        }
        //....
      })
      //....
    })
  }

运行效果

连续聊天能力实现

虽然我们已经和 CLI 打通了和大模型的交互聊天，但是此时聊天 CLI 是没有聊天上下文功能的。

我们需要为这个聊天 CLI 增加上下文功能。对于直接调用大模型 OPEN API 来说，这通常需要我们将上下文手动处理传入大模型的 API。

但是上面提过，作为一个强大的 LLM 应用开发框架，langchain 提供了开箱即用的能力帮助我们实现。

langchain 只所以称为 chain，它是可以以自定义chain的形式将多个工具串联起来使用。每个串联起来的工具必须是一个实现了 Runnable 接口的实例，目前 langchain 中实现了Runnable 接口的组件有 Prompt ChatModel LLM OutputParser Retriever Tool

这里我们使用 langchain 提供的RunnableWithMessageHistory进行聊天上下文的记录和调用；使用InMemoryChatMessageHistory来实现内存的聊天上下文的存储

修改代码实现如下

通过 ChatPromptTemplate.fromMessages 来初始化传给模型的完整 prompt。其中第一项为我们输入的SystemMessage，第二项为占位传递的历史上下文，第三项是本次我们的输入
通过自定义链将这个prompt和我们的火山chatModel串联起来
将自定义链传递给RunnableWithMessageHistory构造出 withMessageHistory 对象，并实现聊天历史的上下文对象
通过 withMessageHistory.stream 进行模型的调用，并同时传递本次的上下文config对象

import {
  ChatPromptTemplate,
  MessagesPlaceholder,
} from '@langchain/core/prompts'

import { InMemoryChatMessageHistory } from '@langchain/core/chat_history'
import { RunnableWithMessageHistory } from '@langchain/core/runnables'

async runInputLoop() {
  const _prompt = await this.prompt('请输入 prompt\n > ')

  // 通过 `ChatPromptTemplate.fromMessages` 来初始化传给模型的完整 prompt。其中第一项为我们输入的`SystemMessage`，第二项为占位传递的历史上下文，第三项是本次我们的输入
  const prompt = ChatPromptTemplate.fromMessages([
    ['system', _prompt],
    new MessagesPlaceholder('chat_history'),
    ['human', '{input}'],
  ])

  // 通过自定义链将这个`prompt`和我们的火山`chatModel`串联起来
  const chain = prompt.pipe(this.chatModel)

  const messageHistories = {}

  // 将自定义链传递给`RunnableWithMessageHistory`构造出 `withMessageHistory` 对象，并实现聊天历史的上下文对象
  const withMessageHistory = new RunnableWithMessageHistory({
    runnable: chain,
    getMessageHistory: async (sessionId) => {
      if (messageHistories[sessionId] === undefined) {
        messageHistories[sessionId] = new InMemoryChatMessageHistory()
      }
      return messageHistories[sessionId]
    },
    inputMessagesKey: 'input',
    historyMessagesKey: 'chat_history',
  })

    return new Promise((resolve) => {
      const config = {
        configurable: {
          sessionId: `${Date.now()}`,
        },
      }

      rl.on('line', async (line) => {
        if (line === '\\clear') {
          // 接收重置上下文是更新 config
          config.configurable.sessionId = `${Date.now()}`
        }
        else {
          // 通过 `withMessageHistory.stream` 进行模型的调用，并同时传递本次的上下文`config`对象
          const stream = await withMessageHistory.stream({
            input: line,
          }, config)

          for await (const chunk of stream) {
            this.write(chunk.content)
          }
          this.write('\n')
        }

        rl.prompt()
      })
      //....
    })
}

再次运行代码测试，表现符合预期

完整实现

代码详见

import readline from 'node:readline'
import process, { stdin, stdout } from 'node:process'
import { EventEmitter } from 'node:events'

import { ChatVolcengine } from 'langchain-bytedance-volcengine'
import 'dotenv/config'
import { HumanMessage, SystemMessage } from '@langchain/core/messages'
import {
  ChatPromptTemplate,
  MessagesPlaceholder,
} from '@langchain/core/prompts'

import { InMemoryChatMessageHistory } from '@langchain/core/chat_history'
import { RunnableWithMessageHistory } from '@langchain/core/runnables'

class ChatCli extends EventEmitter {
  constructor() {
    super()

    this.input = stdin
    this.output = stdout
    this.input.setEncoding('utf-8')
    this.output.setEncoding('utf-8')

    this.chatModel = new ChatVolcengine({
      volcengineApiHost: process.env.VOLCENGINE_HOST,
      volcengineApiKey: process.env.VOLCENGINE_API_KEY,
      model: process.env.VOLCENGINE_MODEL,
    })
  }

  async runInputLoop() {
    const _prompt = await this.prompt('请输入 prompt\n > ')

    const prompt = ChatPromptTemplate.fromMessages([
      ['system', _prompt],
      new MessagesPlaceholder('chat_history'),
      ['human', '{input}'],
    ])

    const chain = prompt.pipe(this.chatModel)

    const messageHistories = {}
    const withMessageHistory = new RunnableWithMessageHistory({
      runnable: chain,
      getMessageHistory: async (sessionId) => {
        if (messageHistories[sessionId] === undefined) {
          messageHistories[sessionId] = new InMemoryChatMessageHistory()
        }
        return messageHistories[sessionId]
      },
      inputMessagesKey: 'input',
      historyMessagesKey: 'chat_history',
    })

    return new Promise((resolve) => {
      const rl = readline.createInterface(this.input, this.output)

      rl.setPrompt('> ')
      rl.prompt()

      const config = {
        configurable: {
          sessionId: `${Date.now()}`,
        },
      }

      rl.on('line', async (line) => {
        if (line === '\\clear') {
          config.configurable.sessionId = `${Date.now()}`
        }
        else {
          const stream = await withMessageHistory.stream({
            input: line,
          }, config)

          for await (const chunk of stream) {
            this.write(chunk.content)
          }
          this.write('\n')
        }

        rl.prompt()
      })

      rl.on('close', resolve)

      rl.on('SIGINT', () => {
        rl.close()
        process.emit('SIGINT', 'SIGINT')
      })
    })
  }

  write(data) {
    this.output.write(data)
  }

  prompt(query = '> ') {
    return new Promise((resolve) => {
      const rl = readline.createInterface(this.input, this.output)
      rl.question(query, (answer) => {
        resolve(answer)
        rl.close()
      })
    })
  }
}

const cli = new ChatCli()

cli.runInputLoop()

总结

通过本文我们实现了一个简易的聊天 CLI，并成功接入了火山引擎大模型，实现了流式输出和上下文管理功能。通过 langchain.js 提供的工具类和自定义链，我们不仅简化了与大模型的交互，还实现了连续聊天的能力