大文件分片

111 阅读2分钟

什么是大文件分片上传?

大文件分片上传,顾名思义,就是将一个大的文件切割成多个较小的部分(称为“分片”或“块”),然后逐个或并行地将这些分片上传到服务器。服务器接收到所有分片后,再将其重组为原始文件。这种技术不仅提高了上传速度,还增强了上传的可靠性和稳定性。

为什么需要大文件分片上传?

  1. 提升上传速度:通过并行上传多个分片,可以充分利用网络带宽,显著加快上传速度。
  2. 增强可靠性:分片上传允许在上传过程中如果某个分片失败,只需重新上传该分片,而无需从头开始,提高了上传的成功率。
  3. 支持断点续传:用户可以随时暂停和恢复上传,提升了用户体验。

image.png

5323e49ef608596b01bd4ce6ae709ff.png

0f345152059d69b28d8dcd754ad82b4.png

image.png

index.html

<!DOCTYPE html>
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
    <title>分片上传示例</title>
  </head>
  <body>
    <input type="file" id="input-file" />
    <script src="https://cdn.bootcdn.net/ajax/libs/spark-md5/3.0.2/spark-md5.min.js"></script>
    <script src="./main.js" type="module"></script>
  </body>
</html>

main.js

import { cutFile } from "./cutFile.js";

const inputFile = document.getElementById("input-file");
inputFile.onchange = async (e) => {
  const file = e.target.files[0];
  console.time('cutFile')
  const chunks = await cutFile(file);
  console.timeEnd('cutFile')
  console.log(chunks)
}

cutFile.js


const CHUNK_SIZE = 1024 * 1024 * 5; // 5MB
const THREAD_COUNT = navigator.hardwareConcurrency || 4; // 4 threads CPU

async function cutFile(file) {
  return new Promise((resolve) => {
    const chunkCount = Math.ceil(file.size / CHUNK_SIZE);
    const threadChunkCount = Math.ceil(chunkCount / THREAD_COUNT);
    const result = []
    let finished = 0;
    for (let i = 0; i < THREAD_COUNT; i++) {
      // 分配线程
      const worker = new Worker("./worker.js", { type: "module" });
      const start = i * threadChunkCount;
      let end = (i + 1) * threadChunkCount;
      if (end > chunkCount) {
        end = chunkCount;
      }
      worker.postMessage({
        file,
        start,
        end,
        CHUNK_SIZE,
      });
      worker.onmessage = (e) => {
        result[i] = e.data;
        worker.terminate();
        finished++;
        if (finished === THREAD_COUNT) {
          resolve(result.flat());
        }
      };
    }
  });
}
export { cutFile };

worker.js

import { createChunk } from "./chunk.js";

onmessage = async(e) => {
  const { file, start, end, CHUNK_SIZE } = e.data;
  const result = []
  for (let i = start; i < end; i++) {
    const prom = createChunk(file, i, CHUNK_SIZE)
    result.push(prom)
  }
  const chunks = await Promise.all(result)
  postMessage(chunks)
};

chunk.js

export function createChunk(file, index, chunkSize) {
  return new Promise((resolve) => {
    // 获取文件起始位置
    const start = index * chunkSize;
    // 获取文件结束位置
    const end = start + chunkSize >= file.size ? file.size : start + chunkSize;
    const spark = new SparkMD5.ArrayBuffer();
    const fileReader = new FileReader();
    // 截取文件
    const chunk = file.slice(start, end);
    fileReader.onload = (e) => {
      spark.append(e.target.result);
      // spark计算出MD5后的结果
      const _md5 = spark.end();
      resolve({
        chunk,
        _md5,
      });
    };
    fileReader.readAsArrayBuffer(chunk);
  });
}