实现前端大文件分片下载

209 阅读4分钟

要实现一个简单文件下载功能是较为容易的,核心代码如下:

request(url, {
  method,
  responseType: 'blob',
  url,
  getResponse: true,
  timeout: 0,
}).then(result => {
  const blob = result.data;
  const url = window.URL.createObjectURL(blob);
  const a = document.createElement('a');
  a.href = url;
  a.download = '文件名';
  document.body.appendChild(a);
  a.click();
  document.body.removeChild(a);
  return result;
})

这种方法存在的问题就是,如果要下载的文件比较大,到达几百M甚至GB级的时候,会造成大量的内存占用,甚至页面崩溃,所以就需要对大文件进行分片下载,该功能需要前后端配合实现。

前端核心实现思路

  1. 分片下载,通过请求头中包含range: bytes=start-end,可以实现分片读取资源
  2. 根据服务端返回的文件大小,进行任务拆分,下载完成后合并
  3. 下载的切片存入 indexedDB 数据库,避免内存中保留的切片过多时浏览器内存超限
  4. 为了控制并发数量,需要使用任务执行器来控制,避免并发过多时,阻塞其他请求

分片下载逻辑

  1. 根据文件大小,拆分出多个下载任务,传入执行器
  2. 执行器执行任务完成后,返回所有数据
  3. 将所有数据进行合并下载
import {request} from 'umi';
import TaskListRunner from './TaskListRunner';
const DEFAULT_CHUNK_SIZE = 10 * 1024 * 1024
// 下载数据
function downloadBlob(mergedBlob, fileName) {
  console.log('mergedBlob.length:', mergedBlob.length)
  const blob = new Blob(mergedBlob, {type: 'application/octet-stream'})
  const url = window.URL.createObjectURL(blob)
  const a = document.createElement('a')
  a.href = url
  a.download = fileName
  a.click()
  window.URL.revokeObjectURL(url)
}
// 对每个任务的相应结果进行处理,转为 blob
function formatResponse(res) {
  return new Promise((resolve, reject) => {
    res.response.blob().then(blob => {
      resolve(blob)
    }).catch(err => {
      reject(err)
    })
  })
}
// 分片下载逻辑
function shardDownload(url, fileName, fileSize) {
  const loadingCancel = Message.loading('正在下载中...')
  const chunkSize = DEFAULT_CHUNK_SIZE
  fileSize = Number(fileSize)
  if (!fileSize) {
    Message.error('文件大小错误')
    return
  }
  // 计算分片,按照顺序生成每个分片的range,生成任务列表
  const bytesList = []
  for (let id = 0;;id++) {
    const start = id * chunkSize
    if (start > fileSize) {
      break
    }
    let end = start + chunkSize
    if (end > fileSize) {
      end = fileSize
    }
    bytesList.push(`bytes=${start}-${end}`)
  }
  const taskList = []
  bytesList.forEach((range, index) => {
    taskList.push(() => {
      console.log('开始下载分片range:', range, 'index:', index)
      return request(url, {
        method: 'post',
        data: {
          chunkSize,
          chunkIndex: index,
          name: fileName
        },
        timeout: 0,
        getResponse: true,
        headers: {
          range: range,
          responseType: 'blob'
        }
      })
    })
  })
  // 创建任务执行器,开始执行任务
  const runner = new TaskListRunner(taskList, 3, formatResponse)
  runner.start().then(async res => {
    console.log('分片下载完成,开始合并文件和下载')
    downloadBlob(res, fileName)
    Message.success('操作成功')
    loadingCancel()
  }).catch(err => {
    Message.error(err.message || '操作失败')
    loadingCancel()
  })
}
export default shardDownload;

任务执行器逻辑

  1. 传入任务列表,最大并发数,任务执行成功后的数据格式化函数
  2. 任务列表中的任务是异步任务,返回一个promise
  3. 每一个任务执行成功后,将数据格式化后存入indexedDB
  4. 所有任务执行完成后,从indexedDB中取出数据,返回给调用者
  5. 任务执行过程中,如果有任务失败,停止所有任务,返回错误
  6. 所有任务执行完成后,或者任务失败后,或者任务刚开始执行之前,都会清空indexedDB
  7. 任务执行器是单例模式,同一时间只能有一个任务在执行
  8. 任务执行器执行完成后,会重置状态,可以再次执行
  9. 最终所有任务执行完毕后,要按照顺序取出所有数据
import IDBWrapper from '@/util/indexedDB'
class TaskListRunner {
  static isUssing = false
  static instance
  constructor(taskList, maxQps = 3, formatResponse = res => res) {
    if (TaskListRunner.isUssing) {
      // 如果已经有一个实例在运行,不允许再次创建实例
      return TaskListRunner.instance
    }
    TaskListRunner.instance = this
    this.taskList = taskList;
    this.maxQps = maxQps;
    this.resultList = [];
    this._runningCount = 0;
    this._successCount = 0;
    this._taskQueue = [];
    this._startResolve = null;
    this._startReject = null;
    this.formatResponse = formatResponse
    this.db = new IDBWrapper('taskListRunner', 'taskList', 'index')
    this.db.clear()
  }
  _createTask(task, index){
    return () => new Promise((resolve, reject) => {
      this._runningCount++
      task().then(async data => {
        resolve()
        // 有可能任务已经停止了,不再插入数据
        if (!TaskListRunner.isUssing) {
          return
        }
        const fData = await this.formatResponse(data)
        console.log('formatResponse success, index:', index)
        // const blob = await data?.response?.blob()
        await this.db.add({index, data: fData})
        // this.resultList.push({index, data})
        console.log('insert into db success, index:', index)
        this._successCount++
        console.log('this._successCount', this._successCount)
        if (this._successCount === this.taskList.length) {
          console.log('任务全部完成');
          this._success()
        }
      }).catch(err => {
        this._stop(err)
      }).finally(() => {
        this._runningCount--
        if (this._taskQueue.length) {
          this._taskQueue.shift()()
        }
      })
    })
  }
  _stop(err) {
    this._startReject(err)
    this._reset()
  }
  _reset() {
    TaskListRunner.isUssing = false
    this._taskQueue = []
    this._runningCount = 0
    this._successCount = 0
    this.resultList = []
    this._startResolve = null
    this._startReject = null
    this.db.clear().then(() => {
      console.log('clear db success')
    }).catch(() => {
      console.log('clear db fail')
    })
  }
  _success() {
    Promise.all(this.taskList.map((_, index) => this.db.get(index))).then(res => {
      console.log('get all data success', res);
      this._startResolve(res?.map(item => item.data))
      this._reset()
    }).catch(err => {
      console.log(err)
      this._stop(new Error('get all data fail'))
    })
  }
  start() {
    // 如果已经有任务在执行中,不允许再次执行
    if (TaskListRunner.isUssing) {
      return Promise.reject(new Error('已经有在执行中的任务'))
    }
    TaskListRunner.isUssing = true
    // eslint-disable-next-line promise/param-names
    return new Promise((startResolve, startReject) => {
      this._startResolve = startResolve
      this._startReject = startReject
      this.taskList.forEach((task, index) => {
        if (this._runningCount >= this.maxQps) {
          this._taskQueue.push(this._createTask(task, index))
        } else {
          this._createTask(task, index)()
        }
      })
    })
  }
}
export default TaskListRunner;

indexedDB数据库操作逻辑

class IDBWrapper {
  constructor(dbName, storeName, keyPath) {
    this.dbName = dbName;
    this.storeName = storeName;
    this.keyPath = keyPath;
    this.db = null;
  }
  // 打开数据库
  open() {
    return new Promise((resolve, reject) => {
      const request = indexedDB.open(this.dbName);
      request.onupgradeneeded = event => {
        this.db = event.target.result;
        if (!this.db.objectStoreNames.contains(this.storeName)) {
          this.db.createObjectStore(this.storeName, { keyPath: this.keyPath });
        }
      };
      request.onsuccess = event => {
        this.db = event.target.result;
        resolve(this.db);
      };
      request.onerror = event => {
        reject(event.target.error);
      };
    });
  }
  // 关闭数据库
  close() {
    this.db.close();
  }
  // 添加数据
  add(data) {
    return new Promise((resolve, reject) => {
      this.open().then(() => {
        const transaction = this.db.transaction(this.storeName, 'readwrite');
        const store = transaction.objectStore(this.storeName);
        const request = store.add(data);
        request.onsuccess = event => {
          resolve(event.target.result);
        };
        request.onerror = event => {
          reject(event.target.error);
        };
      }).catch(err => {reject(err)})
    });
  }
  // 删除数据
  delete(key) {
    return new Promise((resolve, reject) => {
      this.open().then(() => {
        const transaction = this.db.transaction(this.storeName, 'readwrite');
        const store = transaction.objectStore(this.storeName);
        const request = store.delete(key);
        request.onsuccess = event => {
          resolve(event.target.result);
        };
        request.onerror = event => {
          reject(event.target.error);
        };
      }).catch(err => reject(err))
    });
  }
  // 查询数据
  get(key) {
    return new Promise((resolve, reject) => {
      this.open().then(() => {
        const transaction = this.db.transaction(this.storeName, 'readonly');
        const store = transaction.objectStore(this.storeName);
        const request = store.get(key);
        request.onsuccess = event => {
          resolve(event.target.result);
        };
        request.onerror = event => {
          reject(event.target.error);
        };
      }).catch(err => reject(err))
    });
  }
  // 删除所有数据
  clear() {
    return new Promise((resolve, reject) => {
      this.open().then(() => {
        const transaction = this.db.transaction(this.storeName, 'readwrite');
        const store = transaction.objectStore(this.storeName);
        const request = store.clear();
        request.onsuccess = event => {
          resolve(event.target.result);
        };
        request.onerror = event => {
          reject(event.target.error);
        };
      }).catch(err => reject(err))
    });
  }
  // 删除某一条数据
  deleteData(key) {
    return new Promise((resolve, reject) => {
      this.open().then(() => {
        const transaction = this.db.transaction(this.storeName, 'readwrite');
        const store = transaction.objectStore(this.storeName);
        const request = store.delete(key);
        request.onsuccess = event => {
          resolve(event.target.result);
        };
        request.onerror = event => {
          reject(event.target.error);
        };
      }).catch(err => reject(err))
    });
  }
}
export default IDBWrapper;

总结

通过以上方法进行大文件的下载可以做到并发控制和内存控制,其实还可以做到实时展示下载进度和暂停,恢复下载等,可以再额外补充逻辑。

任务执行器的实现原理可以参考我的另一篇文章:

juejin.cn/post/747111…