前端大文件上传中的:分片上传、断点续传、秒传

1,392 阅读2分钟

方案


  • 前端:vue、element-ui
  • 服务端:nodejs

分片上传

将大文件切片成多个小文件块,然后同时上传


思路

前端
  • 前端通过<input type="file" />的文件选择器,获取用户选择的文件
<template>
  <div>
   	<input type="file" @change="handleFileChange" />
 </div>
</template>

<script>
     handleFileChange(e) {
     	const [file] = e.target.files;
     	// ...
	 }
</script>

设定分片大小,将文件切片成多个文件块

<script>
	const SIZE = 10 * 1024 * 1024; // 切片大小
	// 生成文件切片
	createFileChunk(file, size = SIZE) {
	 const fileChunkList = [];
	  let cur = 0;
	  while (cur < file.size) {
	    fileChunkList.push({ file: file.slice(cur, cur + size) });
	    cur += size;
	  }
	  return fileChunkList;
	},
</script>

使用 web-worker 在 worker 线程计算 hash(用于标识不同的文件内容),避免页面假死

<script>
   calculateHash(fileChunkList) {
     return new Promise(resolve => {
      // 添加 worker 属性
       this.container.worker = new Worker("/hash.js");
       this.container.worker.postMessage({ fileChunkList });
       this.container.worker.onmessage = e => {
         const { hash } = e.data;
         if (hash) {
           resolve(hash);
         }
       };
     });
   }
</script>
// /public/hash.js
self.importScripts("/spark-md5.min.js"); // 导入脚本

// 生成文件 hash
self.onmessage = e => {
  const { fileChunkList } = e.data;
  const spark = new self.SparkMD5.ArrayBuffer();
  let count = 0;
  const loadNext = index => {
    const reader = new FileReader();
    reader.readAsArrayBuffer(fileChunkList[index].file);
    reader.onload = e => {
      count++;
      spark.append(e.target.result);
      if (count === fileChunkList.length) {
        self.postMessage({
          hash: spark.end()
        });
        self.close();
      }
      // 递归计算下一个切片
      loadNext(count);
    };
  };
  loadNext(0);
};

上传分片

<template>
  <div>
   	<!-- ... -->
   	<el-button @click="handleUpload">上传</el-button>
 </div>
</template>
<script>
	// 上传切片
	async uploadChunks() {
	  const requestList = this.data
	    .map(({ chunk,hash }) => {
	      const formData = new FormData();
	      formData.append("chunk", chunk);
	      formData.append("hash", hash);
	      formData.append("filehash", this.container.hash);
	      return { formData };
	    })
	    .map(async ({ formData }) =>
	      this.request({
	        url: "http://localhost:3000",
	        data: formData
	      })
	    );
	  await Promise.all(requestList); // 并发切片
	},
	async handleUpload() {
	  if (!this.container.file) return;
	  const fileChunkList = this.createFileChunk(this.container.file);
	  this.container.hash = await this.calculateHash(fileChunkList);
	  this.data = fileChunkList.map(({ file },index) => ({
	    chunk: file,
	    hash: this.container.hash + "-" + index // hash + 数组下标
	  }));
	  await this.uploadChunks();
	}
</script>
后端
  • 接收分片
const http = require("http");
const path = require("path");
const fse = require("fs-extra");
const multiparty = require("multiparty");

const server = http.createServer();
const UPLOAD_DIR = path.resolve(__dirname, "..", "target"); // 大文件存储目录

server.on("request", async (req, res) => {
  res.setHeader("Access-Control-Allow-Origin", "*");
  res.setHeader("Access-Control-Allow-Headers", "*");
  if (req.method === "OPTIONS") {
    res.status = 200;
    res.end();
    return;
  }

  // 使用 multiparty 包处理前端传来的 FormData
  // 在 multiparty.parse 的回调中,files 参数保存了 FormData 中文件,fields 参数保存了 FormData 中非文件的字段
  const multipart = new multiparty.Form();

  multipart.parse(req, async (err, fields, files) => {
    if (err) {
      return;
    }
    const [chunk] = files.chunk;
    const [hash] = fields.hash;
    const [filehash] = fields.filehash;
    const chunkDir = path.resolve(UPLOAD_DIR, filehash);

   // 切片目录不存在,创建切片目录
    if (!fse.existsSync(chunkDir)) {
      await fse.mkdirs(chunkDir);
    }

    // fs-extra 专用方法,类似 fs.rename 并且跨平台
    // fs-extra 的 rename 方法 windows 平台会有权限问题
    await fse.move(chunk.path, `${chunkDir}/${hash}`);
    res.end("received file chunk");
  });
});
server.listen(3000, () => console.log("正在监听 3000 端口"));

合并分片

const pipeStream = (path, writeStream) =>
 new Promise(resolve => {
   const readStream = fse.createReadStream(path);
   readStream.on("end", () => {
     fse.unlinkSync(path);
     resolve();
   });
   readStream.pipe(writeStream);
 });

const mergeFileChunk = async (filePath, filehash, size) => {
 const chunkDir = path.resolve(UPLOAD_DIR, filehash);
 const chunkPaths = await fse.readdir(chunkDir);
 // 根据切片下标进行排序
 // 否则直接读取目录的获得的顺序可能会错乱
 chunkPaths.sort((a, b) => a.split("-")[1] - b.split("-")[1]);
 await Promise.all(
   chunkPaths.map((chunkPath, index) =>
     pipeStream(
       path.resolve(chunkDir, chunkPath),
       // 指定位置创建可写流
       fse.createWriteStream(filePath, {
         start: index * size,
         end: (index + 1) * size
       })
     )
   )
 );
 fse.rmdirSync(chunkDir); // 合并后删除保存切片的目录
};

断点续传

在一些暂停/恢复的上传场景下,需要在上一次的位置接着上传


思路

  • 断点续传是在分片上传的基础之上的
  • 在用户点击暂停时,将所有正在上传的分片取消
<template>
  <div>
    <!-- ... -->
    <el-button @click="handlePause" v-if="isPaused">暂停</el-button>
    <el-button @click="handleResume" v-else>恢复</el-button>
  </div>
</template>
<script>
	request({
	   url,
	   method = "post",
	   data,
	   headers = {},
	   requestList
	 }) {
	   return new Promise(resolve => {
	     const xhr = new XMLHttpRequest();
	     xhr.open(method, url);
	     Object.keys(headers).forEach(key =>
	       xhr.setRequestHeader(key, headers[key])
	     );
	     xhr.send(data);
	     xhr.onload = e => {
	       // 将请求成功的 xhr 从列表中删除
	       if (requestList) {
	         const xhrIndex = requestList.findIndex(item => item === xhr);
	         requestList.splice(xhrIndex, 1);
	       }
	       resolve({
	         data: e.target.response
	       });
	     };
	     // 暴露正在上传的 xhr 给外部
	     requestList?.push(xhr);
	   });
	}
	 
	handlePause() {
	  this.requestList.forEach(xhr => xhr?.abort());
	  this.requestList = [];
	}
</script>
  • 当用户点击恢复时,向服务器询问已经上传了哪些分片,然后继续上传剩余的分片即可
async handleResume() {
   const { uploadedList } = await this.verifyUpload(
     this.container.file.name,
     this.container.hash
   );
   await this.uploadChunks();
}

// 对之前的函数微改
async uploadChunks() {
  const requestList = this.data
  	.filter(({ hash }) => !uploadedList.includes(hash))
    .map(({ chunk,hash }) => {
		// ...
    })
    .map(async ({ formData }) =>
		// ...
    );
  await Promise.all(requestList); // 并发切片
},
  • 服务端接口
// 返回已经上传切片名列表
const createUploadedList = async fileHash =>
 fse.existsSync(path.resolve(UPLOAD_DIR, fileHash))
  ? await fse.readdir(path.resolve(UPLOAD_DIR, fileHash))
  : [];
  
server.on("request", async (req, res) => {
  if (req.url === "/verifyUpload") {
    const data = await resolvePost(req);
    const { fileHash} = data;
    res.end(
      JSON.stringify({
        uploadedList: await createUploadedList(fileHash)
      })
    );

  }
});

秒传

即不管多大的文件,都是瞬间上传完成


思路

  • 同分片上传时向服务器询问哪些分片已上传
  • 实现秒传则是向服务器询问该文件是否在服务器已存在,若存在则不用继续上传,即秒传(用户看来)

参考文章


实现一个大文件上传和断点续传