基于H5分片上传、断点续传,采用迅雷下载的方式的上传服务

807 阅读4分钟

前言

在使用迅雷的时候,一直对迅雷下载文件的方式很感兴趣,一个是temp.xltd文件,另外一个是 config.cfg文件,大致的猜想是迅雷是多线程下载,需要将每个线程的数据保存到temp.xltd文件中,然后在config.cfg标记此次数据块的位置和字节数,为此我将写个大文件上传来验证我的猜想。

主要阐述实现思路,需要看代码的请点击breakpoint-upload

前端实现

// 入口函数
// 通过队列完成并发上传
// 此函数返回五个方法
// start     开始上传,
// stop      暂停
// done      队列为空后执行,需要用户注册业务逻辑的回调函数 
// next      继续下一个任务
// execute   队列执行函数,需要用户注册业务逻辑的回调函数 
// 
export function queueUpload(
  file: File,
  options: UploadOptions = {
    chunkSize: 2 * 1024 * 1024,
    concurrent: 4,
    fieldname: "file",
  }
) {
  let { chunkSize, concurrent = 1, fieldname = "file" } = options;
  // 将文件按照固定大小分割成多个chunk
  let chunks = blobSlice(file, chunkSize);
  // 生成需要上传的FormData数组
  let formDatas = generateFormData(chunks, file.name, fieldname);
  let queue: Item[] = [];
  for (let index = 0; index < formDatas.length; index++) {
    const element = formDatas[index];
    queue.push({
      data: element,
    });
  }

  let concurrentQueue: Item[] = []; // 执行中的数据
  let valve = false; // 阀门
  let isDone = false;

  // 执行
  let execute = (item: FormData) => {};
  // 队列完成
  let done = () => {};

  const active = {
    stop: function stop() {
      valve = false;
    },
    start: function start() {
      valve = true;
      if (queue.length === 0 && !isDone) {
        done.call(null);
        isDone = true;
        return;
      }
      while (concurrentQueue.length < concurrent) {
        let item = queue.shift();
        if (!item) return;
        concurrentQueue.push(item);
        execute.call(null, item.data);
      }
    },
    // 完成后继续下一个
    next: function next() {
      concurrentQueue.shift();
      if (!valve) return;
      active.start();
    },
    execute: function (fn: (item?: FormData) => void) {
      execute = fn;
    },
    done: function (fn: () => void) {
      done = fn;
    },
  };
  return {
    ...active,
  };
}

服务端的实现

后端服务暴露两个接口

  • 断点续传接口 /renewal,检查文件是否上传完成,是就直接返回文件地址,否就返回已完成的分片标识
  • 文件上传接口 /upload,主要由两个中间件来实现的,第一个处理请求FormData字段和数据流,第二个处理上传逻辑
// 处理FormData数据的中间件
export function makeField() {
  return function Field(req: Request, _: Response, next: NextFunction) {
    let busboy = new Busboy({
      headers: req.headers,
    });
    busboy.on("file", (fieldname, fileStream, filename, encoding, mimetype) => {
      if (!filename) return fileStream.resume();
      let finalPath = path.join(os.tmpdir(), `${uuid.v4()}-${filename}`);

      let file = {
        finalPath: finalPath,
        fieldname: fieldname,
        originalname: filename,
        encoding: encoding,
        mimetype: mimetype,
      };
      appendField(req.body, fieldname, file);
      // 将数据临时保存到系统缓存文件夹,这边是牺牲速度换取空间
      // 还有个思路是将数据写入Duplex双向流中,传递给下一个中间件处理 
      let outStream = fs.createWriteStream(finalPath);
      fileStream.pipe(outStream);
      fileStream.on("end", () => {
        outStream.close();
      });
    });
    busboy.on("field", (fieldname, value) => {
      appendField(req.body, fieldname, value);
    });
    busboy.on("finish", () => {
      req.unpipe(busboy);
      next();
    });
    req.pipe(busboy);
  };
}
// 创建一个待写入的临时文件和记入分片上传信息
export function makeUpload() {
  return function Upload(req: Request, res: Response, next: NextFunction) {
    let {
      sizes,
      size,
      byteOffset,
      position,
      chunkIndex,
      chunkCount,
      md5,
      type,
      fieldname,
    } = req.body;

    let { originalname, encoding, mimetype, finalPath, buffers } = req.body[fieldname];

    if (!fs.existsSync(getNowTempPath())) {
      mkdirsSync(getNowTempPath());
    }

    // 分片待写入文件
    let pathTmp = `${getNowTempPath()}/${md5}.${nameSuffix(originalname)}`;
    if (!fs.existsSync(pathTmp)) {
      fs.writeFileSync(pathTmp, new Uint8Array());
    }

    // 分片上传状态
    let pathCfgTmp = `${getNowTempPath()}/${md5}.${nameSuffix(originalname)}.cfg`;
    if (!fs.existsSync(pathCfgTmp)) {
      let arr = new ArrayBuffer(+chunkCount * 12);
      uintCodeCfg(arr, (uint32, uint8, index) => {
        uint32[0] = index;
        uint8[0] = 0;
      });
      fs.writeFileSync(pathCfgTmp, new Uint8Array(arr));
    }
    readCacheFile(req, next, pathTmp, pathCfgTmp);
  };
}

// 读取缓存文件将分片写入到临时文件中
async function readCacheFile(
  req: Request,
  next: NextFunction,
  pathTmp: string,
  pathCfgTmp: string
) {
  let {
    sizes,
    size,
    byteOffset,
    position,
    chunkIndex,
    chunkCount,
    md5,
    type,
    fieldname,
  } = req.body;

  let { originalname, encoding, mimetype, finalPath, buffers } = req.body[
    fieldname
  ];

  let fileHandle = await fs.promises.open(pathTmp, "r+");
  // 读取缓存文件
  let rs = fs.createReadStream(finalPath);
  let _position = +position;
  rs.on("data", (chunk) => {
    let __position = _position;
    if (chunk instanceof Uint8Array) {
      _position += chunk.length;
      // 将chunk写入到具体的位置{{__position}}
      fs.write(fileHandle.fd, chunk, 0, chunk.length, __position, () => {});
    }
  });
  rs.on("end", async () => {
    await fileHandle.close();

    // 删除缓存文件
    fs.promises.unlink(finalPath);

    let fi = await chunkFinish(pathCfgTmp, +chunkIndex);
    // 上传完成后将文件移动到正式文件夹下,并删除分片状态文件
    if (fi.filter((f) => f.complete === 1).length === +chunkCount) {
      fs.promises.unlink(pathCfgTmp);
      let _path = `${getUploadsPath()}/${md5}.${nameSuffix(originalname)}`;
      await fs.promises.rename(pathTmp, _path);
      appendField(req.body[fieldname], "path", _path);
    }

    rs.close();
    next();
  });
}

// 将当前分片状态保存
async function chunkFinish(pathCfgTmp: string, chunkIndex: number) {
  // 读取上传详情文件查看分片上传状态
  // 保证分片状态文件同步读取和写入,避免旧值覆盖新值
  let buf = fs.readFileSync(pathCfgTmp);
  let arr = pickTypedArrayBuffer(buf);
  let fi: any[] = [];
  // 数据编码  
  uintCodeCfg(arr, (uint32, uint8, index) => {
    if (index === +chunkIndex) {
      uint8[0] = 1;
    }
    fi.push({
      index: uint32[0],
      complete: uint8[0],
    });
  });
  // 写入修改后的状态
  fs.writeFileSync(pathCfgTmp, new Uint8Array(arr));
  return fi;
}

测试代码

function uploadFile(file: File) {
  let { execute, start, stop, next, done } = queueUpload(file, {
    chunkSize: 2 * 1024 * 1024,
    concurrent: 6,
    fieldname: "file",
  });
  const task = async () => {
    // 为了计算速度,计算md5只用头部和尾部的分片  
    let md5 = await fileMd5(file);
    done(async () => {
      console.info("finish");
    });
    let res = await HTTP.post(baseUrl + "/file/renewal", {
      md5,
      filename: file.name,
    }).resule<Result<{ path: string; list: number[] }>>();
    if (res.code !== 200) return;
    if (res.data.path) {
      console.info(res.data.path);
      return;
    }
    let renewalArr = res.data.list;
    execute((item) => {
      if (!item) return;
      item.set("md5", md5);
      let chunkIndex = item.get("chunkIndex");
      if (chunkIndex && renewalArr.indexOf(+chunkIndex) !== -1) {
        next();
        return;
      }
      const upload = async () => {
        let res = await HTTP.post(baseUrl + "/file/upload", item).resule<any>();
        console.info(res);
        next();
      };
      upload();
    });
    start();
    console.info("start");
  };
  task();
}

由于fetch的限制,上传进度无法实现,下载功能可以通过 Response.body.getReader()处理下载进度问题

小结

这次服务采用了不同的方式处理分片,去除了分片的合并操作和多次分片的转移,不过这次服务也带来了对临时文件多次的打开和关闭操作,记入分片详情的文件多次读取和写入(可以通过写入内存来实现,去除同步读取操作带来更高的性能),代码上还有很多优化的地方,比如边界的处理和性能的提升等,欢迎大家在评论区,多多评论交流!