记录-大文件上传的实现

135 阅读2分钟

前言

代码参考了大圣老师的文章,原文链接# 字节跳动面试官,我也实现了大文件上传和断点续传

前端部分

  1. 文件的分片上传
  2. 上传的并发处理
  3. 断点续传与秒传
  4. 分片上传失败的处理
  5. 文件哈希值的计算

遇到的问题

  • 计算大文件hash值会阻塞主线程的运行,使用webworker解决
  • 文件hash值计算时间过久的问题,可以对分片抽样计算hash,可以极大幅度缩短计算的时间,缺点是上传过的文件被修改过后,可能会被误判已上传
<template>
  <div>
    <div style="border-bottom:1px solid gray ;padding-bottom: 30px;">
      <h1>大文件上传系统</h1>
      <input type="file" ref="file" @change="onFileChange($event)" />
      <button
        v-show="uploadStatus === 'uploading'"
        @click="handleStatusChange('paused')"
      >
        暂停
      </button>
      <button
        v-show="uploadStatus === 'paused'"
        @click="handleStatusChange('uploading')"
      >
        开始
      </button>
      <button v-show="!['stop','finished'].includes(uploadStatus) && uploadProgress < 100" @click="handleCancel">取消上传</button>
      {{ statusText[uploadStatus]}}
    </div>
    <div style="border-bottom:1px solid gray ;padding-bottom: 30px;">
      <h3>模拟上传失败:{{ number === 0?'已关闭':'已开启' }}</h3>
      <button @click="number === 0? number=0.5:number=0">{{ number > 0?'关闭':'开启' }}</button>
    </div>
    <div style="border-bottom:1px solid gray ;padding-bottom: 30px;">
      <h3>当前hash计算方式:{{isSample?'抽样':'全量'}}</h3>
      <button @click="isSample = !isSample">{{!isSample?'抽样':'全量'}}计算hash</button>
    </div>
    <div style="border-bottom:1px solid gray;padding-bottom: 30px;">
      <h3>hash计算进度</h3>
      <progress :value="calcHashProgress" max="100"></progress>
      <div>{{ Math.ceil(calcHashProgress) }}%</div>
    </div>
    <div style="border-bottom:1px solid gray;padding-bottom: 30px;">
      <h3>文件上传进度</h3>
      <progress :value="uploadProgress" max="100"></progress>
      <div>{{ Math.ceil(uploadProgress) }}%</div>
    </div>
    
  </div>
</template>
<script>
import axios from 'axios'
let uploadedChunks = []; //已上传的分片(下标的集合)
let fileChunks = [];//全部文件分片
export default {
  data() {
    return {
      file: null,//已选择文件
      uploadStatus: "stop",//上传状态
      fileHash: "",//文件hash值
      isSample: true,//是否为抽样计算hash
      calcHashProgress: 0,//哈希计算进度
      uploadProgress: 0, //上传进度
      uploadedCount:0, // 已上传分片数
      statusText:{
        uploading: "上传中",
        paused: "已暂停",
        finished: "已完成",
        reupload:'部分分片上传失败,1秒后重新上传',
        stop:'请选择上传文件'
      },
      number:0,//用于模拟上传失败的情况
      reuploadTimes:0,//失败重试次数
      cancelList:[] // 需要取消的请求
    };
  },
  watch:{
    uploadStatus(newValue){
      if(['paused','stop'].includes(newValue)){
        this.cancelList.forEach(cancel=>cancel())
        this.cancelList = []
      }
    }
  },
  methods: {
    async onFileChange(e) {
      this.handleStatusChange('stop')
      this.file = e.target.files[0];
      if(!this.file) return 
      // 文件切片
      fileChunks = this.splitFile(this.file);
      try {
        this.fileHash = this.isSample ? await this.calcFileHashSample(this.file) : await this.calcFileHashWithWorker(fileChunks);
        console.log(this.fileHash, "this.fileHash");
      } catch (error) {
        console.log(error, "error");
      }
      this.handleStatusChange('uploading')
    },
    // 切割文件
    splitFile(file = [], chunkSize = 1024 * 1024 * 1) {
      // 装分片的数组
      const chunks = [];
      // 总的分片数
      const totalChunk = Math.ceil(file.size / chunkSize);
      let i = 0;
      while (i < totalChunk) {
        // 切割的开始位置
        let startPos = i * chunkSize;
        // 切割的结束位置
        let endPos = i * chunkSize + chunkSize;
        // 切割拿到每个大小为 chunkSize 的分片
        const chunk = file.slice(
          startPos,
          endPos >= file.size ? file.size : endPos
        );
        chunks.push({ chunk, cIndex: i });
        i++;
      }
      return chunks;
    },
    // 处理请求并发
    sendRequest(chunksInfo, maxRequest = 5) {
      let count = 0; //请求完成数
      let idx = 0;
      const len = chunksInfo.length;
      const c_len = fileChunks.length
      const failList = []; //上传失败的列表
      return new Promise((resolve, reject) => {
        const startWork = () => {
          while (idx < len && maxRequest > 0) {
            if (this.uploadStatus === "paused"){
              return reject({ status: "暂停上传" });
            }
            else if(this.uploadStatus === "stop"){
              this.handleStatusChange('stop')
              return reject({ status: "停止上传" });
            }
            maxRequest--; //占用通道
            const formData = new FormData();
            const cInfo = chunksInfo[idx];
            formData.append("chunk", cInfo.chunk);
            formData.append("filename", this.file.name);
            formData.append("hash", this.fileHash);
            formData.append("cIndex", cInfo.cIndex);
            idx++;
            this.uploadFile(formData)
              .then((res) => {
                // 上传进度 = 已上传数 / 分片的总数
                this.uploadedCount ++ 
                if(this.uploadedCount === c_len){
                  this.uploadProgress = 100
                }else{
                  this.uploadProgress = this.uploadedCount * 100 / c_len
                }
                console.log(count,this.uploadedCount,this.uploadProgress,fileChunks,'===============');
                // console.log("上传成功", res);
              })
              .catch((e) => {
                failList.push(cInfo);
                console.error("上传失败", e);
              })
              .finally(() => {
                maxRequest++; //释放通道
                count++;
                if (count === len) {
                  failList.length>0 && console.log("失败重试", failList);
                  failList.length>0 && (this.uploadStatus = "reupload");
                  resolve({ status: "上传完毕", failList });
                } else {
                  startWork();
                }
              });
          }
        };
        startWork();
      });
    },
    // 上传文件
    uploadFile(formData) {
      if(Math.random() < this.number){
        return Promise.reject({stuats:'上传失败'})
      }
      const CancelToken = axios.CancelToken
      let self = this
      return axios.post('http://127.0.0.1:8088/upload',formData,{
        headers:{'Content-Type': 'multipart/form-data'},
        cancelToken: new CancelToken(function executor(c) {
          self.cancelList.push(c);
          // 这个参数 c 就是CancelToken构造函数里面自带的取消请求的函数,这里把该函数当参数用
        })
      }).then(res=>{
        return res.data
      }).catch(e=>e)
    },
    // 处理上传状态改变
    async handleStatusChange(status) {
      this.uploadStatus = status;
      if (this.uploadStatus === "uploading") {
        const {data} = await this.getUploadedInfo()
        uploadedChunks = []
        if(data?.status === 'existsSome'){
          uploadedChunks = data.cIndexs
          this.uploadedCount = uploadedChunks.length
        }else if(data?.status === 'existsAll'){
          this.uploadProgress = 100
          fileChunks = []
          return this.uploadStatus = 'finished'
        }
        // 过滤出未上传的分片
        const notUploadChunks = fileChunks.filter((item) => {
          return !uploadedChunks.includes(Number(item.cIndex));
        });
        console.log(notUploadChunks,'notUploadChunks')
        this.sendRequest(notUploadChunks).then((res) => {
          const { status, failList } = res;
          // 存在上传失败的分片,进行重试
          if (failList.length > 0) {
            if(this.reuploadTimes > 5) return 
            this.reuploadTimes ++
            setTimeout(()=>{
              this.handleStatusChange("uploading");
            },1000)
          } else {
            this.uploadStatus = "finished"
            this.reuploadTimes = 0
            fileChunks = []
            this.finishUpload()
            console.log("分片全部上传完毕");
          }
        });
      } else if (this.uploadStatus === "paused") {
        console.log('上传已暂停')
      }else if(this.uploadStatus === "stop"){
        this.file = null
        fileChunks = []
        uploadedChunks = []
        this.uploadProgress = 0
        this.uploadedCount = 0
        this.calcHashProgress = 0
        this.reuploadTimes = 0
      }
    },
    // 用worker计算文件hash
    calcFileHashWithWorker(chunks = []) {
      return new Promise((resolve, reject) => {
        const worker = new Worker("./hashWorker.js");
        worker.postMessage({ chunks });
        worker.onmessage = (e) => {
          // 接收外部Worker回传的信息
          const { progress, hash } = e.data;
          this.calcHashProgress = progress
          console.log(progress, "progress");
          if (hash) {
            console.log(hash, "hash");
            worker.terminate(); //关闭线程
            resolve(hash);
          }
        };
        worker.onerror = (e) => {
          reject(e);
        };
      });
    },
    // 文件分片抽样
    async calcFileHashSample(file = []) {
      const size = file.size,
        offset = 2 * 1024 * 1024,
        chunks = [{chunk:file.slice(0, offset),cIndex:0}];
      let curSize = 0, count = 0;
      // 抽样: 第一个区块2M,中间区块取前中后各2个字节,最后区块数据全要
      while (curSize < size) {
        if (curSize + offset > size) {
          chunks.push({chunk:file.slice(curSize, size),cIndex:count});
        } else {
          const mid = curSize + offset / 2,
            end = curSize + offset;
          chunks.push({chunk:file.slice(curSize, curSize + 2),cIndex:count});
          chunks.push({chunk:file.slice(mid, mid + 2),cIndex:count});
          chunks.push({chunk:file.slice(end - 2, end),cIndex:count});
        }
        curSize += offset;
        count ++
      }
      return await this.calcFileHashWithWorker(chunks)
    },
    // 开始上传
    getUploadedInfo(){
      // 查询哪些分片已上传
      return axios.get('http://127.0.0.1:8088/getUploadedInfo?fileHash='+this.fileHash).then(res=>{
        return res.data
      })
    },
    // 完成上传
    finishUpload(){
      return axios.post('http://127.0.0.1:8088/mergeFiles',{fileHash:this.fileHash}).then(res=>{
        return res.data
      })
    },
    // 取消上传
    cancelUpload(){
      // 把分片清空
      return axios.post('http://127.0.0.1:8088/cancelUpload',{fileHash:this.fileHash}).then(res=>{
        return res.data
      })
    },
    // 处理取消
    async handleCancel(){
      this.uploadStatus = 'stop'
      const {code} = await this.cancelUpload();
      if(code == 200){
        this.handleStatusChange('stop')
      }
    }
  },
};
</script>

calculateHash.js

self.importScripts('spark-md5.min.js') // 引入 spark-md5
self.onmessage = (e) => {
  const { chunks } = e.data;
  const spark = new self.SparkMD5.ArrayBuffer()
  let progress = 0,
    count = 0,
    len = chunks.length;
  const loadNext = (index) => {
    const reader = new FileReader()
    reader.readAsArrayBuffer(chunks[index].chunk)
    reader.onload = e => {
      count++
      spark.append(e.target.result)    // 将读取的内容添加入spark生成hash
      if (count === len) {
        self.postMessage({
          progress: 100,
          hash: spark.end()
        })
        console.timeEnd('loading')
        console.timeStamp('loading')
      } else {
        progress += 100 / len
        self.postMessage({ progress })
        loadNext(count)
      }
    }
  };
  console.time('loading')
  loadNext(0);
};

node部分

  1. 断点续传与秒传
  2. 分片全部上传完毕把分片合并
var http = require("http");
var fs = require("fs");
const url = require("url")
var formidable = require('formidable');
// 基础路径
const basePath = `C:/Users/Administrator/Desktop/demo/mytest/src/components/uploadView/files/`
if(!fs.existsSync(basePath)) fs.mkdirSync(basePath)
let cancelRequestList = [] //是否有删除的请求
//创建服务器对象
//每当有人 访问了app,function就会执行一次。
var app = http.createServer(function (req, res) {
  if (req.url) {
    res.setHeader("Access-Control-Allow-Origin","*");
    // 可以单独配置白名单,也就是能进行跨域访问的网址
    // res.setHeader("Access-Control-Allow-Origin",["http://127.0.0.1:5500"]);
    //...Headers必须的固定值,"content-type"
    res.setHeader("Access-Control-Allow-Headers","X-request-With,content-type");
    // res.setHeader("Access-Control-Allow-Headers","X-request-With");
    res.setHeader("Access-Control-Allow-Methods","GET,POST,DELETE,PUT,OPTIONS")
  }
  const {query,pathname} = url.parse(req.url,true)
  if(pathname === "/upload"){
    if(req.method !== "POST"){
      return res.end(JSON.stringify({
        code:500,
        message:'Request Method Not Allow'
      }));
    }
    var form = new formidable.IncomingForm();
    form.parse(req, function(err, fields, files) {
      if(!files.chunk){
        return res.end();
      }
      const { filepath:sourceFilepath } = files.chunk
      const { filename, hash ,cIndex } = fields
      const filename2 = `${cIndex}-${filename}`
      copyFile(sourceFilepath, hash, filename2,function(err){
        if(err){
          res.end(JSON.stringify({
            code:500,
            message:'upload fail'
          }));
        }else{
          res.end(JSON.stringify({
            code:200,
            message:'success'
          }));
        }
      })
    });
  }else if(pathname === "/getUploadedInfo"){
    let json = {code: 200, message: 'success'}
    if(query.fileHash){
      json.data = getUploadedInfo(query.fileHash)
    }else{
      json = {code: 500, message: 'fail'}
    }
    res.end(JSON.stringify(json));
  }else if(pathname === "/mergeFiles"){
    let body = ''
    req.on('data',(data)=>{
      body+= data
    })
    req.on('end',()=>{
      if(body.length === 0){
        res.end(JSON.stringify({code: 500, message: 'hash is null'}));
      }else{
        const {fileHash} = JSON.parse(body)
        mergeFiles(fileHash)
        res.end(JSON.stringify({code: 200, message: 'success'}));
      }
    })
  }else if(pathname === "/cancelUpload"){
    if(req.method !== "POST"){
      return res.end(JSON.stringify({
        code:500,
        message:'Request Method Not Allow'
      }));
    }
    let body = ''
    req.on('data',(data)=>{
      body+= data
    })
    req.on('end',()=>{
      const {fileHash} = body.length > 0 ? JSON.parse(body): {}
      cancelRequestList.push(fileHash)
      if(!fileHash){
        res.end(JSON.stringify({code: 500, message: 'hash is null'}));
      }else{
        deleteFiles(basePath + fileHash).then(()=>{
          res.end(JSON.stringify({code: 200, message: 'success'}));
        }).catch(()=>{
          res.end(JSON.stringify({code: 500, message: 'fail'}));
        }).finally(()=>{ cancelRequestList = cancelRequestList.filter(item=>item!==fileHash) })
      }
    })
  }
});

app.listen(8088, "127.0.0.1", function (err) {
  //err 错误对象, 默认值:null,
  // 只有app.listen 出现错误, err={err:'xxxx'}
  if (err) {
    throw err;
  } else {
    console.log("服务器运行在127.0.0.1:8088上");
  }
});
// 获取已上传的分片
const getUploadedInfo = (hash) => {
  console.log(basePath + hash,'basePath + hash');
  const json = {isFileExists:false,cIndexs:[]}
  const files = fs.readdirSync(basePath)
  const file = files.find(item=>item.startsWith(hash))
  if(file){
    const stat = fs.statSync(basePath + file)
    json.isFileExists = true
    json.status='existsAll'
    if(stat.isDirectory()){
      json.status='existsSome'
      const chunks = fs.readdirSync(basePath + file)
      console.log(chunks)
      json.cIndexs = chunks.map(item=>Number(item.split('-')[0]))
    }
  }
  return json
}
// 合并文件
const mergeFiles = (hash) =>{
  let filename = ''
  const promiseArr = fs.readdirSync(basePath + hash).map((item,i,arr)=>{
    const cIndex = Number(item.split('-')[0])
    if(!filename) filename = item.split('-').slice(1,arr.length).join('-')
    return new Promise((resolve)=>{
      let chunk = []
      const file = fs.createReadStream(basePath + hash +'/'+item)
      file.on('data', (ck) => {
        chunk.push(ck)
      });
      file.on('end', () => {
        resolve({chunk,cIndex})
      });
    })
  })
  // 追加写入
  const ws = fs.createWriteStream(basePath + hash +'-' + filename,{flags: 'a'})
  Promise.all(promiseArr).then(res=>{
    res.sort((a,b)=>a.cIndex-b.cIndex)
    for(let i=0;i<res.length;i++) {
      for(let j = 0;j < res[i].chunk.length; j++)
        ws.write(res[i].chunk[j])
    }
    ws.close()
    deleteFiles(basePath + hash)
  }).catch(()=>{
    
  })
}

// 复制文件
const copyFile = (sourceFilepath, hash, filename2, callback) => {
  const rootpath = basePath + hash
  if(!fs.existsSync(rootpath)){
    fs.mkdirSync(rootpath)
  }
  if(fs.existsSync(rootpath) && !cancelRequestList.includes(hash)){
    fs.copyFileSync(sourceFilepath, rootpath+'/'+filename2)
    fs.unlinkSync(sourceFilepath)
    callback()
  }
}

// 删除文件
function deleteFiles(url) {
  try {
    return new Promise((reslove)=>{
      let count=0
      fs.readdir(url, (err, files) => {
        if (err) throw err;
        // 该文件下的所有文件
        files.forEach((item) => {
          // 该子项文件的文件路径
          let fliesUrl = url + '\\' + item;
          // 同步读取文件是不是文件夹
          // let stat = fs.lstatSync(fliesUrl);
          // 删除该文件
          fs.unlink(fliesUrl, function (err) {
            if (err) {
              throw err;
            }
            count++
            if(count === files.length) reslove()
            console.log('文件:' + fliesUrl + '删除成功!');
          })
        })
      })
    }).then(() => {
      fs.rmdir(url, function (err) {
        if (err) {
          throw err;
        }
        console.log('目录:' + url + '删除成功!');
      })
    })
  } catch (error) {
    console.log('报错', error)
    return error
  }
}