记录一下大文件上传实现代码(分片hash上传 断点续传 秒传 )

94 阅读4分钟

参考自 juejin.cn/post/732388…

代码仓库地址 github.com/polikm5/big…

在此基础上增加了对promise并发控制的优化以及首中尾切片抽样hash的优化

项目结构

image.png

前端部分

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>大文件上传文件</title>
</head>
<body>

  <input type="file" id="file">
  <input type="button" id="upload" value="上传">

  <script  src="./src/spark-md5.js"></script>
  <script  src="./src/operate.js"></script>
</body>
</html>
  const fileEle = document.querySelector("#file");
  const uploadButton = document.querySelector("#upload");
  const continueButton = document.querySelector("#continue");

  let fileHash = "";
  let fileName = "";
  uploadButton.addEventListener("click", async (e) => {
    e.preventDefault();
    console.log("上传按钮被点击了");
    console.log(fileEle.files[0]); // 获取选择文件的file数据
    const file = fileEle.files[0];
    uploadFile(file);
  });

  /**
   * 单个文件chunk上传
   * @param {*} file
   * @returns
   */
  const uploadHandler = async (chunk) => {
    return new Promise((resolve, reject) => {
      try {
        const fd = new FormData();
        fd.append("file", chunk.file);
        fd.append("fileHash", chunk.fileHash);
        fd.append("chunkIndex", chunk.chunkIndex);
        // let xhr = new XMLHttpRequest()
        // xhr.open("POST","http://localhost:3000/upload")
        // xhr.onload = function(e) {
        //   console.log("e",e)
        //   console.log("xhr",xhr)
        //   let data = JSON.parse(xhr.responseText)
        //   if(xhr.status == 200 && data['code'] == 200) {
        //     chunk.uploaded = true
        //     resolve(data)
        //   }
        // }
        // xhr.send(fd)

        let result = fetch("http://localhost:3000/upload", {
          method: "POST",
          body: fd,
        }).then((res) => res.json());
        chunk.uploaded = true;
        resolve(result);
      } catch (e) {
        reject(e);
      }
    });
  };
  // 预设10MB切片
  const chunkSize = 1024 * 1024 * 10;
  /**
   * 文件切片
   * @param {*} file 上传的文件
   * @returns 返回文件切片
   */
  const createChunks = (file) => {
    let chunks = [];
    let start = 0;
    let index = 0;

    while (start < file.size) {
      let chunk = file.slice(start, start + chunkSize);
      chunks.push({
        file: chunk,
        uploaded: false,
        chunkIndex: index,
        fileHash: fileHash, // 把hash加上 用于分辨是属于哪个文件的chunk块
      });
      start += chunkSize;
      index++;
    }

    return chunks;
  };

  /**
   * 通过spark-md5获取文件hash值
   * @param {*} file 文件
   * @returns 返回hash值
   */
  const getHash = (file) => {
    console.log("file", file);
    const cutSize = 100;
    const size = file.size;
    const middle = Math.floor(size / 2);
    let sparkmd5 = new SparkMD5()
    return new Promise((resolve, reject) => {
      const fileReader = new FileReader();
      // 截取前中后做hash值
      let cutFiles = [file.slice(0, cutSize),file.slice(middle, middle + 100),file.slice(size - cutSize, size)]
      let i = 0;
      const loadNext = () => {
        fileReader.readAsArrayBuffer(cutFiles[i])
      }
      fileReader.onload = function (e) {
        if (i == 2) {
          const fileMd5 = sparkmd5.end();
          console.log("fileMd5",fileMd5)
          resolve(fileMd5);
        } else {
          i++;
          sparkmd5.append(e.target.result);
          loadNext()
        }
      };
      loadNext()
      // fileReader.readAsArrayBuffer(cutFile); // 通过ArrayBuffer读取
      // fileReader.onload = function (e) {
      //   console.log(e.target.result);
      //   let fileMD5 = SparkMD5.ArrayBuffer.hash(e.target.result);
      //   console.log(fileMD5);
      //   resolve(fileMD5);
      // };
    });
  };

  /**
   * 批量上传chunk
   * @param {*} chunks
   * @param {*} maxRequest 最大并发数
   * @returns
   */
  const uploadChunks = (chunks, maxRequest = 6) => {
    return new Promise((resolve, reject) => {
      if (chunks.length == 0) {
        resolve([]);
        return;
      }
      let requestArr = [];
      let loadCount = 0;
      let curIndex = 0;
      const load = (chunk, index) => {
        if (!chunks || index >= chunks.length) {
          return;
        }
        uploadHandler(chunk)
          .then((r) => {
            requestArr[index] = r;
          })
          .catch((e) => {
            requestArr[index] = e;
          })
          .finally(() => {
            loadCount++;
            if (loadCount == chunks.length) {
              resolve(requestArr);
            } else {
              curIndex++;
              load(chunks[curIndex], curIndex);
            }
          });
      };
      for (let i = 0; i < maxRequest; i++) {
        curIndex = i;
        load(chunks[i], curIndex);
      }

      // let requestSliceArr = [];
      // let start = 0;
      // while (start < chunks.length) {
      //   requestSliceArr.push(chunks.slice(start, start + maxRequest));
      //   start += maxRequest;
      // }
      // let index = 0;
      // let requestResults = [];
      // let requestErrResults = [];
      // const request = () => {
      //   if (index > requestSliceArr.length - 1) {
      //     resolve(requestResults);
      //     return;
      //   }
      //   let sliceChunks = requestSliceArr[index];
      //   Promise.all(sliceChunks.map((chunk) => uploadHandler(chunk)))
      //     .then((res) => {
      //       requestResults.push(...(Array.isArray(res) ? res : []));
      //       index++;
      //       request();
      //     })
      //     .catch((e) => {
      //       requestErrResults.push(...(Array.isArray(e) ? e : []));
      //       reject(requestErrResults);
      //     });
      // };

      // request();
    });
  };

  const uploadFile = async (file) => {
    fileName = file.name;
    fileHash = await getHash(file);
    const { fileExist } = await verifyFile(fileHash, fileName);
    if (fileExist) {
      console.log("当前文件已经上传过");
      return;
    }
    let chunks = createChunks(file);
    const { index } = await verifyIntegrity(fileHash);
    if (index) {
      // 表示之前有chunk上传过 但是因为网络或其他原因 导致chunk没有全部上传完
      // chunks
      index.forEach((item) => {
        chunks[item]["uploaded"] = true;
      });
      // 只上传 uploaded标识为false的chunk
      chunks = chunks.filter((it) => !it.uploaded);
    }
    try {
      await uploadChunks(chunks);
      await mergeRequest(fileHash, fileName);
    } catch (e) {
      return {
        msg: "上传文件错误",
        err: e,
      };
    }
  };

  // 切片合并接口
  const mergeRequest = async (fileHash, fileName) => {
    return fetch(
      `http://localhost:3000/merge?fileHash=${fileHash}&fileName=${fileName}`
    )
      .then((res) => res.json())
      .then((r) => console.log(r));
  };

  // 确认file是否上传过
  const verifyFile = async (fileHash, fileName) => {
    return fetch(
      `http://localhost:3000/verify?fileHash=${fileHash}&fileName=${fileName}`
    )
      .then((res) => res.json())
      .then((r) => r);
  };

  // 验证file完整性 断点续传
  const verifyIntegrity = async (fileHash) => {
    return fetch(`http://localhost:3000/verifyIntegrity?fileHash=${fileHash}`)
      .then((res) => res.json())
      .then((r) => r);
  };

后端express

const express = require("express");
// bodyParser 解析请求体
const bodyParser = require("body-parser");

// 处理 multipart/form-data 类型的表单数据 主要用于上传文件
const multer = require("multer");
const path = require("path");
const fs = require("node:fs");
const fse = require("fs-extra");
// 定义存放的文件夹地址 这里表示当前文件夹下单uploadFiles
const UPLOADPATH = "uploadFiles"
const upload = multer({ dest: `./${UPLOADPATH}/` });
const app = express();

// 处理跨域请求 的中间件
app.all("*", (req, res, next) => {
  res.header("Access-Control-Allow-Origin", "*");
  res.header("Access-Control-Allow-Headers", "Content-Type");
  next();
});
// 处理URL编码格式的数据
app.use(bodyParser.urlencoded({ extended: false }));
// // 处理JSON格式的数据
app.use(bodyParser.json());

app.use(express.static("static")); //静态资源托管

// upload.single("file") 表明只处理给定的表单字段 这个是file字段
app.post("/upload", upload.single("file"), (req, res) => {
  const { fileHash, chunkIndex } = req.body;
  // 路径为 uploadFiles 下面的 hash
  let tempFileDir = path.resolve(UPLOADPATH, fileHash);
  // 如果当前临时文件夹不存在 则创建对应hash的文件夹
  if (!fs.existsSync(tempFileDir)) {
    fs.mkdirSync(tempFileDir);
  }
  // 最终切片位置存放在 uploadFiles 下面的hash 下面的 chunkIndex
  const targetFilePath = path.resolve(tempFileDir, chunkIndex);
  // multer默认存放的位置
  const currentFilePath = path.resolve(req.file.path);
  if (!fs.existsSync(targetFilePath)) {
    // 如果当前不存在该临时文件夹 则将当前文件切片移动到 目标位置
    fse.moveSync(currentFilePath, targetFilePath);
  } else {
    // 存在 则说明不需要用到上传的切片
    // 所以可以直接删除当前的文件切片
    fse.removeSync(currentFilePath);
  }
  res.send({
    msg: "上传成功",
    code: 200,
  });
});

// 获取完所有切片之后 调取/merge接口 将切片组合成 hash+extname后缀名 的文件
app.get("/merge", async (req, res) => {
  console.log("merge");
  const { fileHash, fileName } = req.query;
  // 最终合并的文件路径
  const targetFilePath = path.resolve(
    UPLOADPATH,
    fileHash + path.extname(fileName)
  );
  // 临时文件夹路径
  const tempFilePath = path.resolve(UPLOADPATH, fileHash);

  const chunkPaths = fse.readdirSync(tempFilePath);

  // 将切片追加到文件中
  let mergeTasks = [];
  for (let i = 0; i < chunkPaths.length; i++) {
    mergeTasks.push(
      new Promise((resolve) => {
        // 当前切片路径
        const chunkPath = path.resolve(tempFilePath, i + "");
        // 将当前遍历的切片追加到文件中
        fse.appendFileSync(targetFilePath, fse.readFileSync(chunkPath));
        // 删除当前遍历的切片
        fse.unlinkSync(chunkPath);
        resolve();
      })
    );
  }
  await Promise.all(mergeTasks);
  // 所有切片放置到新文件之后 删除临时的文件夹
  fse.removeSync(tempFilePath);
  res.send({
    msg: "合并成功",
    code: 200,
  });
});

// 校验是否已经上传过相同的文件  通过hash+extname检测
// 可用于大文件秒传
app.get("/verify", (req, res) => {
  let { fileName, fileHash } = req.query;
  const targetFilePath = path.resolve(UPLOADPATH,fileHash + path.extname(fileName))
  const fileExist = fse.existsSync(targetFilePath)
  res.send({
    code: 200,
    fileExist
  })
});

// 验证完整性 用于断点续传 传递已经上传的切片index
app.get("/verifyIntegrity", (req,res) => {
  let {fileHash} = req.query
  let tempFileDir = path.resolve(UPLOADPATH,fileHash)
  const tempFileDirIsExist = fse.existsSync(tempFileDir)
  const chunkPaths = tempFileDirIsExist ? fse.readdirSync(tempFileDir) : null
  res.send({
    code: 200,
    index: chunkPaths 
  })
})
app.listen(3000, () => {
  console.log("服务已运行:localhost:3000");
});