假如面试官问你怎么处理大文件上传

70 阅读4分钟

一般来说:大文件上传都是分成这样的思路 1:生成文件切片 2:生成hash 值 3:将切片的数据进行维护成一个包括该切片文件,切片名的对象 4:上传切片 5:合并 那么我们一个个思路分析 如何生成文件切片?是不是要分块,定义每一块的数量比如为10kb,这样去切分,形成这样的blob集合[10kb,20kb,30kb,...] ok,那么文件切片的大概思路如下

// 1. 生成文件切片
const createChunkFileList = (file: File, size = CHUNK_SIZE) => {
  const chunkFileList = [];
  let currentSize = 0;
  while(currentSize < file.size) {
    // 第一次 0 - 10mb
    // 第二次 10mb - 20mb
    chunkFileList.push(file.slice(currentSize, currentSize + size));
    currentSize += size;
  }
  return chunkFileList;
}

好了,当前你已经完成了分块,那么是不是要给每个分块生成一个hash值,对吧,那我们都知道可能hash值的生成会很慢,可能会影响到主流程的事件处理,那么我们可以怎么做呢? 聪明的同学可能想到用web worker开启线程,在不影响主流程的事件处理中,可以通过onmessage的结果返回给主线程 ok,,那么我们就写一个webworker文件吧

import SparkMD5 from "spark-md5";
self.onmessage = function(e) {
  let chunkFileList = e.data;
  let currentChunk = 0
  let spark = new SparkMD5.ArrayBuffer();
  const loadNext = (index) => {
    const reader = new FileReader()
    reader.onload = function(e) {
      currentChunk++
      spark.append(e.target.result);
      if(currentChunk === chunkFileList) {
        self.postMessage({
          hash: spark.end()
        });
        self.close();
      }else {
        loadNext(currentChunk); // 递归调用
      }
    }
  }
  loadNext(0); // 开始加载第一个分片
}

ok,第二步完成了 那么是不是要进行第三步 将文件转化成接口文档要的对象类型数据

// 2. 将切片的数据进行维护成一个包括该切片文件,切片名的对象
const transformChunkFileList = (
  chunkFileList: Blob[],
  name: string,
  hash: string
) => {
  return chunkFileList.map((chunkFile, index) => {
    return {
      chunk: chunkFile, // 切片文件
      hash: `${name}-${hash}-${index}`, // 切片名
    }
  })
}

自定义请求方法

// 3. 自定义上传切片的方法
const request: RequestHandler = ({
  url,
  data,
  method = "post",
  headers = {},
  onProgress,
}) => {
  return new Promise((resolve, reject) => {
    const xhr = new XMLHttpRequest();
    xhr.open(method, url);
    if (onProgress) {
      xhr.upload.onprogress = onProgress;
    }
    Object.keys(headers).forEach((key) =>
      xhr.setRequestHeader(key, headers[key])
    );
    xhr.addEventListener("load", (e) => {
      if (xhr.status < 200 || xhr.status >= 300) {
        reject("失败");
      }
      resolve(JSON.parse((e.target as XMLHttpRequest).responseText));
    });
    xhr.send(data);
  });
};

const handleProgress = (name: string): OnProgress => {
  return (e) => {
    fileList.value.forEach((file) => {
      if (file.name === name) {
        if (e.loaded === e.total) {
          // 已经加载的大小
          file.loaded += e.loaded;
          // 计算百分比
          file.percentage = Number(
            ((file.loaded / (file.size as number)) * 100).toFixed(2)
          );
          if (file.percentage === 100) {
            file.status = "success";
          }
        }
      }
    });
  };
};

okk,到了这一步是不是要准备发起接口请求了,但是可能会有个请求,就是比如你上传到一半,网络中断了,那么是不是得要把之前的文件再请求?那你觉得这样合理吗?那肯定不合理,大文件上传肯定会出现断点的情况下,那么我们要怎么避免这样情况呢?那肯定就要求后端提供一个接口给我们去查询,判断之前的文件切片是否有上传过,如果有,直接return掉是不是这样就可以减少对服务器的压力,提高性能呢?好了,说干就干

// 3. 自定义上传切片的方法
const request: RequestHandler = ({
  url,
  data,
  method = "post",
  headers = {},
  onProgress,
}) => {
  return new Promise((resolve, reject) => {
    const xhr = new XMLHttpRequest();
    xhr.open(method, url);
    if (onProgress) {
      xhr.upload.onprogress = onProgress;
    }
    Object.keys(headers).forEach((key) =>
      xhr.setRequestHeader(key, headers[key])
    );
    xhr.addEventListener("load", (e) => {
      if (xhr.status < 200 || xhr.status >= 300) {
        reject("失败");
      }
      resolve(JSON.parse((e.target as XMLHttpRequest).responseText));
    });
    xhr.send(data);
  });
};

const handleProgress = (name: string): OnProgress => {
  return (e) => {
    fileList.value.forEach((file) => {
      if (file.name === name) {
        if (e.loaded === e.total) {
          // 已经加载的大小
          file.loaded += e.loaded;
          // 计算百分比
          file.percentage = Number(
            ((file.loaded / (file.size as number)) * 100).toFixed(2)
          );
          if (file.percentage === 100) {
            file.status = "success";
          }
        }
      }
    });
  };
};
  // 确认上传切片情况
  const { data } = await verifyRequest(name, hash);

  if (!data.needUpload) {
    console.log(data.url);
    return;
  }

好了,进入到主题了,此时Promise有个静态方法叫Promise.all这个方法不熟悉的话,就去搜一下api啦

上传切片
// 4. 上传切片
const uploadChunks = async (
  fileList: ChunkFileList,
  name: string,
  chunkFileList: string[] = [] // 已经上传好的切片
) => {
  // 获取到还没上传的切片,为什么要这样呢?这样就可以防止对服务器带宽造成一定的压力,优化性能
  const requestList = fileList.filter((file) => {
    return !chunkFileList.some((chunkFile) => file.hash.includes(chunkFile));
  }).map(({ chunk, hash}) => {
    const formData = new FormData();
    formData.append("chunk", chunk, hash); // 切片文件
    formData.append("filename", name); // 切片名
    return formData
  }).map(formData => {
    return request({
      url: "http://localhost:3000/upload/file",
      data: formData,
      onProgress: handleProgress(name),
    });
  })
  // 并发请求
  await Promise.all(requestList);
}

最后合并切片

// 5. 通知服务器合并上传切片成一个文件
const mergeRequest = (name: string, hash: string) => {
  return request({
    url: "http://localhost:3000/merge/file",
    headers: { "content-type": "application/json" },
    data: JSON.stringify({
      filename: name,
      fileHash: hash,
    }),
  });
};
 const res = await mergeRequest(name, hash);

end

完整的源码如下

<template>
  <el-upload
    v-model:file-list="fileList"
    :limit="FILE_MAX_LENGTH"
    :on-exceed="handleExceed"
    :before-upload="beforeUpload"
    :http-request="handleUpload"
  >
    <el-button type="primary">点击上传</el-button>
    <template #tip>
      <div class="el-upload__tip">上传文件大小不能超过1GB</div>
    </template>
  </el-upload>
</template>

<script lang="ts">
export default {
  name: "Upload",
};
</script>

<script lang="ts" setup>
import { ref } from "vue";
import { ElMessage } from "element-plus";
import type {
  UploadProps,
  // UploadUserFile,
  UploadRequestOptions,
} from "element-plus";
import type {
  ChunkFileList,
  UploadCustomFile,
  OnProgress,
  RequestHandler,
} from "./type";

// 上传的文件列表
const fileList = ref<UploadCustomFile[]>([]);
// 允许上传的文件最大数量
const FILE_MAX_LENGTH = 5;

// 上传超出限制
const handleExceed: UploadProps["onExceed"] = () => {
  ElMessage.warning(`上传图片数量不能超过${FILE_MAX_LENGTH}个!`);
};

// 上传之前
const beforeUpload: UploadProps["beforeUpload"] = (rawFile) => {
  if (rawFile.size / 1024 / 1024 / 1024 > 1) {
    ElMessage.error("上传大小不能超过1G!");
    return false;
  }
  return true;
};

// 使用 web-worker 创建 hash
const createHash =  (chunkFileList: Blob[]): Promise<string> => {
  return new Promise((resolve, reject) => {
    // 获取worker 文件地址
    const worker = new Worker(new Url("./createHashWorker.js", import.meta.url), {
      type: 'module' // 指定worker类型为module
    })
    // 给worker发送消息
    worker.postMessage(chunkFileList);
    worker.onmessage = (e) => {
      const { hash } = e.data
      if(hash) {
        resolve(hash)
      }else {
        reject("生成hash失败")
      }
    }
  })
}

// 切片上传
const handleUpload = async (options: UploadRequestOptions) => {
  // 1. 生成文件切片
  const chunkFileList = createChunkFileList(options.file);
  const name = options.file.name;

  // 生成文件的hash
  const hash = await createHash(chunkFileList);

  // 2. 将切片的数据进行维护成一个包括该切片文件,切片名的对象
  const transformFileList = transformChunkFileList(chunkFileList, name, hash);

  // 上传之前,初始化上传进度
  fileList.value.forEach((file) => {
    if (file.name === name) {
      file.loaded = 0;
      file.status = "uploading";
    }
  });

  // 确认上传切片情况
  const { data } = await verifyRequest(name, hash);

  if (!data.needUpload) {
    console.log(data.url);
    return;
  }

  // 3. 上传切片
  await uploadChunks(transformFileList, name, data.chunkFileList);
  // 4. 合并
  const res = await mergeRequest(name, hash);

  console.log(res);
};

// 自定义切片大小 10mb
const CHUNK_SIZE = 10 * 1024 * 1024;// 单位b

// 1. 生成文件切片
const createChunkFileList = (file: File, size = CHUNK_SIZE) => {
  const chunkFileList = [];
  let currentSize = 0;
  while(currentSize < file.size) {
    // 第一次 0 - 10mb
    // 第二次 10mb - 20mb
    chunkFileList.push(file.slice(currentSize, currentSize + size));
    currentSize += size;
  }
  return chunkFileList;
}

// 2. 将切片的数据进行维护成一个包括该切片文件,切片名的对象
const transformChunkFileList = (
  chunkFileList: Blob[],
  name: string,
  hash: string
) => {
  return chunkFileList.map((chunkFile, index) => {
    return {
      chunk: chunkFile, // 切片文件
      hash: `${name}-${hash}-${index}`, // 切片名
    }
  })
}
// 3. 自定义上传切片的方法
const request: RequestHandler = ({
  url,
  data,
  method = "post",
  headers = {},
  onProgress,
}) => {
  return new Promise((resolve, reject) => {
    const xhr = new XMLHttpRequest();
    xhr.open(method, url);
    if (onProgress) {
      xhr.upload.onprogress = onProgress;
    }
    Object.keys(headers).forEach((key) =>
      xhr.setRequestHeader(key, headers[key])
    );
    xhr.addEventListener("load", (e) => {
      if (xhr.status < 200 || xhr.status >= 300) {
        reject("失败");
      }
      resolve(JSON.parse((e.target as XMLHttpRequest).responseText));
    });
    xhr.send(data);
  });
};

const handleProgress = (name: string): OnProgress => {
  return (e) => {
    fileList.value.forEach((file) => {
      if (file.name === name) {
        if (e.loaded === e.total) {
          // 已经加载的大小
          file.loaded += e.loaded;
          // 计算百分比
          file.percentage = Number(
            ((file.loaded / (file.size as number)) * 100).toFixed(2)
          );
          if (file.percentage === 100) {
            file.status = "success";
          }
        }
      }
    });
  };
};

// 确认上传切片情况
const verifyRequest = (filename: string, hash: string) => {
  return request({
    url: "http://localhost:3000/verify/file",
    headers: { "content-type": "application/json" },
    data: JSON.stringify({
      filename,
      fileHash: hash,
    }),
  });
};

// 4. 上传切片
const uploadChunks = async (
  fileList: ChunkFileList,
  name: string,
  chunkFileList: string[] = [] // 已经上传好的切片
) => {
  // 获取到还没上传的切片,为什么要这样呢?这样就可以防止对服务器带宽造成一定的压力,优化性能
  const requestList = fileList.filter((file) => {
    return !chunkFileList.some((chunkFile) => file.hash.includes(chunkFile));
  }).map(({ chunk, hash}) => {
    const formData = new FormData();
    formData.append("chunk", chunk, hash); // 切片文件
    formData.append("filename", name); // 切片名
    return formData
  }).map(formData => {
    return request({
      url: "http://localhost:3000/upload/file",
      data: formData,
      onProgress: handleProgress(name),
    });
  })
  // 并发请求
  await Promise.all(requestList);
}
// const uploadChunks = async (
//   fileList: ChunkFileList,
//   name: string,
//   chunkFileList: string[] = [] // 已经上传好的切片
// ) => {
//   const requestList = fileList
//     .filter((file) => {
//       return !chunkFileList.some((chunkFile) => file.hash.includes(chunkFile));
//     })
//     .map(({ chunk, hash }) => {
//       const formData = new FormData();
//       formData.append("chunk", chunk, hash);
//       formData.append("filename", name);
//       return formData;
//     })
//     .map((formData) => {
//       return request({
//         url: "http://localhost:3000/upload/file",
//         data: formData,
//         onProgress: handleProgress(name),
//       });
//     });

//   // 并发请求
//   await Promise.all(requestList);
// };

// 5. 通知服务器合并上传切片成一个文件
const mergeRequest = (name: string, hash: string) => {
  return request({
    url: "http://localhost:3000/merge/file",
    headers: { "content-type": "application/json" },
    data: JSON.stringify({
      filename: name,
      fileHash: hash,
    }),
  });
};
</script>

<style scoped></style>

type类型文件
import type { UploadUserFile } from "element-plus";

export interface ChunkFile {
  chunk: Blob;
  hash: string;
}

export type ChunkFileList = ChunkFile[];

interface XMLHttpRequest {
  loaded: number;
  total: number;
}

export interface OnProgress {
  (e: XMLHttpRequest): any;
}

export interface RequestType {
  url: string;
  data: any;
  headers?: {
    [key: string]: any;
  };
  method?: "post";
  onProgress?: OnProgress;
}

export interface UploadCustomFile extends UploadUserFile {
  loaded: number;
}

export interface ResponseType<T = any> {
  code: number;
  message: string;
  data: T;
  success: boolean;
}

export interface RequestHandler<T = any> {
  (options: RequestType): Promise<ResponseType<T>>;
}

workder文件
import SparkMD5 from "spark-md5";
self.onmessage = function(e) {
  let chunkFileList = e.data;
  let currentChunk = 0
  let spark = new SparkMD5.ArrayBuffer();
  const loadNext = (index) => {
    const reader = new FileReader()
    reader.onload = function(e) {
      currentChunk++
      spark.append(e.target.result);
      if(currentChunk === chunkFileList) {
        self.postMessage({
          hash: spark.end()
        });
        self.close();
      }else {
        loadNext(currentChunk); // 递归调用
      }
    }
  }
  loadNext(0); // 开始加载第一个分片
}