Q10: Stream(流)是什么?有哪些类型?如何用它来处理大文件?

61 阅读2分钟

Node.js 面试题详细答案 - Q10

Q10: Stream(流)是什么?有哪些类型?如何用它来处理大文件?

Stream 概述

Stream 是 Node.js 中处理流式数据的抽象接口,用于处理大量数据而不会占用过多内存。

Stream 的四种类型

1. Readable Stream(可读流)
const fs = require('fs')

// 创建可读流
const readableStream = fs.createReadStream('large-file.txt')

// 监听数据事件
readableStream.on('data', (chunk) => {
  console.log('接收到数据块:', chunk.length, '字节')
})

// 监听结束事件
readableStream.on('end', () => {
  console.log('文件读取完成')
})

// 监听错误事件
readableStream.on('error', (err) => {
  console.error('读取错误:', err)
})
2. Writable Stream(可写流)
const fs = require('fs')

// 创建可写流
const writableStream = fs.createWriteStream('output.txt')

// 写入数据
writableStream.write('Hello World\n')
writableStream.write('这是第二行\n')

// 结束写入
writableStream.end()

// 监听完成事件
writableStream.on('finish', () => {
  console.log('写入完成')
})

// 监听错误事件
writableStream.on('error', (err) => {
  console.error('写入错误:', err)
})
3. Duplex Stream(双工流)
const { Duplex } = require('stream')

// 创建双工流
const duplexStream = new Duplex({
  read(size) {
    // 读取逻辑
    this.push('数据块')
    this.push(null) // 结束
  },

  write(chunk, encoding, callback) {
    // 写入逻辑
    console.log('写入数据:', chunk.toString())
    callback()
  },
})

// 使用双工流
duplexStream.on('data', (chunk) => {
  console.log('读取到:', chunk.toString())
})

duplexStream.write('Hello')
duplexStream.end()
4. Transform Stream(转换流)
const { Transform } = require('stream')

// 创建转换流
const transformStream = new Transform({
  transform(chunk, encoding, callback) {
    // 转换数据
    const transformed = chunk.toString().toUpperCase()
    callback(null, transformed)
  },
})

// 使用转换流
transformStream.on('data', (chunk) => {
  console.log('转换后:', chunk.toString())
})

transformStream.write('hello world')
transformStream.end()

处理大文件

1. 文件复制
const fs = require('fs')

// 使用流复制大文件
function copyFile(source, destination) {
  const readStream = fs.createReadStream(source)
  const writeStream = fs.createWriteStream(destination)

  // 管道连接
  readStream.pipe(writeStream)

  // 监听完成
  writeStream.on('finish', () => {
    console.log('文件复制完成')
  })

  // 监听错误
  readStream.on('error', (err) => {
    console.error('读取错误:', err)
  })

  writeStream.on('error', (err) => {
    console.error('写入错误:', err)
  })
}

copyFile('large-file.txt', 'copy-of-large-file.txt')
2. 文件压缩
const fs = require('fs')
const zlib = require('zlib')

// 压缩大文件
function compressFile(inputFile, outputFile) {
  const readStream = fs.createReadStream(inputFile)
  const writeStream = fs.createWriteStream(outputFile)
  const gzip = zlib.createGzip()

  // 管道连接:读取 → 压缩 → 写入
  readStream.pipe(gzip).pipe(writeStream)

  writeStream.on('finish', () => {
    console.log('文件压缩完成')
  })
}

compressFile('large-file.txt', 'large-file.txt.gz')
3. 文件解压
const fs = require('fs')
const zlib = require('zlib')

// 解压文件
function decompressFile(inputFile, outputFile) {
  const readStream = fs.createReadStream(inputFile)
  const writeStream = fs.createWriteStream(outputFile)
  const gunzip = zlib.createGunzip()

  // 管道连接:读取 → 解压 → 写入
  readStream.pipe(gunzip).pipe(writeStream)

  writeStream.on('finish', () => {
    console.log('文件解压完成')
  })
}

decompressFile('large-file.txt.gz', 'decompressed-file.txt')

自定义 Stream

1. 自定义可读流
const { Readable } = require('stream')

class CounterStream extends Readable {
  constructor(max) {
    super()
    this.max = max
    this.count = 0
  }

  _read() {
    if (this.count < this.max) {
      this.push(`计数: ${this.count}\n`)
      this.count++
    } else {
      this.push(null) // 结束流
    }
  }
}

// 使用自定义可读流
const counter = new CounterStream(5)
counter.on('data', (chunk) => {
  console.log(chunk.toString())
})
2. 自定义转换流
const { Transform } = require('stream')

class LineTransform extends Transform {
  constructor() {
    super()
    this.buffer = ''
  }

  _transform(chunk, encoding, callback) {
    this.buffer += chunk.toString()

    // 按行分割
    const lines = this.buffer.split('\n')
    this.buffer = lines.pop() // 保留最后一行(可能不完整)

    // 处理完整的行
    lines.forEach((line) => {
      if (line.trim()) {
        this.push(`处理: ${line}\n`)
      }
    })

    callback()
  }

  _flush(callback) {
    // 处理最后一行
    if (this.buffer.trim()) {
      this.push(`处理: ${this.buffer}\n`)
    }
    callback()
  }
}

// 使用自定义转换流
const lineTransform = new LineTransform()
lineTransform.on('data', (chunk) => {
  console.log(chunk.toString())
})

lineTransform.write('第一行\n第二行\n第三行')
lineTransform.end()

流的高级用法

1. 错误处理
const fs = require('fs')

function safeFileCopy(source, destination) {
  const readStream = fs.createReadStream(source)
  const writeStream = fs.createWriteStream(destination)

  // 错误处理
  readStream.on('error', (err) => {
    console.error('读取错误:', err)
    writeStream.destroy()
  })

  writeStream.on('error', (err) => {
    console.error('写入错误:', err)
    readStream.destroy()
  })

  // 管道连接
  readStream.pipe(writeStream)

  writeStream.on('finish', () => {
    console.log('复制完成')
  })
}
2. 进度监控
const fs = require('fs')

function copyWithProgress(source, destination) {
  const readStream = fs.createReadStream(source)
  const writeStream = fs.createWriteStream(destination)

  // 获取文件大小
  const stats = fs.statSync(source)
  const fileSize = stats.size
  let copiedSize = 0

  readStream.on('data', (chunk) => {
    copiedSize += chunk.length
    const progress = ((copiedSize / fileSize) * 100).toFixed(2)
    console.log(`进度: ${progress}%`)
  })

  readStream.pipe(writeStream)

  writeStream.on('finish', () => {
    console.log('复制完成')
  })
}
3. 流控制
const fs = require('fs')

function controlledCopy(source, destination) {
  const readStream = fs.createReadStream(source)
  const writeStream = fs.createWriteStream(destination)

  // 控制读取速度
  readStream.on('data', (chunk) => {
    console.log('读取数据块:', chunk.length)

    // 暂停读取
    readStream.pause()

    // 模拟处理延迟
    setTimeout(() => {
      readStream.resume()
    }, 100)
  })

  readStream.pipe(writeStream)
}

性能优化

1. 使用管道
// 高效的文件处理
const fs = require('fs')
const zlib = require('zlib')

// 读取 → 压缩 → 写入
fs.createReadStream('input.txt')
  .pipe(zlib.createGzip())
  .pipe(fs.createWriteStream('output.txt.gz'))
2. 并行处理
const fs = require('fs')
const { Transform } = require('stream')

// 并行处理多个文件
function processFiles(files) {
  files.forEach((file) => {
    const readStream = fs.createReadStream(file)
    const writeStream = fs.createWriteStream(`${file}.processed`)

    readStream.pipe(writeStream)
  })
}

总结

  • Stream 类型:Readable, Writable, Duplex, Transform
  • 主要优势:内存效率高,适合处理大文件
  • 核心方法pipe(), on(), write(), end()
  • 实际应用:文件操作、网络通信、数据转换
  • 性能优化:使用管道、错误处理、进度监控
  • 最佳实践:合理选择流类型,注意错误处理