前言
在使用迅雷的时候,一直对迅雷下载文件的方式很感兴趣,一个是temp.xltd文件,另外一个是 config.cfg文件,大致的猜想是迅雷是多线程下载,需要将每个线程的数据保存到temp.xltd文件中,然后在config.cfg标记此次数据块的位置和字节数,为此我将写个大文件上传来验证我的猜想。
主要阐述实现思路,需要看代码的请点击breakpoint-upload
前端实现
// 入口函数
// 通过队列完成并发上传
// 此函数返回五个方法
// start 开始上传,
// stop 暂停
// done 队列为空后执行,需要用户注册业务逻辑的回调函数
// next 继续下一个任务
// execute 队列执行函数,需要用户注册业务逻辑的回调函数
//
export function queueUpload(
file: File,
options: UploadOptions = {
chunkSize: 2 * 1024 * 1024,
concurrent: 4,
fieldname: "file",
}
) {
let { chunkSize, concurrent = 1, fieldname = "file" } = options;
// 将文件按照固定大小分割成多个chunk
let chunks = blobSlice(file, chunkSize);
// 生成需要上传的FormData数组
let formDatas = generateFormData(chunks, file.name, fieldname);
let queue: Item[] = [];
for (let index = 0; index < formDatas.length; index++) {
const element = formDatas[index];
queue.push({
data: element,
});
}
let concurrentQueue: Item[] = []; // 执行中的数据
let valve = false; // 阀门
let isDone = false;
// 执行
let execute = (item: FormData) => {};
// 队列完成
let done = () => {};
const active = {
stop: function stop() {
valve = false;
},
start: function start() {
valve = true;
if (queue.length === 0 && !isDone) {
done.call(null);
isDone = true;
return;
}
while (concurrentQueue.length < concurrent) {
let item = queue.shift();
if (!item) return;
concurrentQueue.push(item);
execute.call(null, item.data);
}
},
// 完成后继续下一个
next: function next() {
concurrentQueue.shift();
if (!valve) return;
active.start();
},
execute: function (fn: (item?: FormData) => void) {
execute = fn;
},
done: function (fn: () => void) {
done = fn;
},
};
return {
...active,
};
}
服务端的实现
后端服务暴露两个接口
- 断点续传接口
/renewal,检查文件是否上传完成,是就直接返回文件地址,否就返回已完成的分片标识 - 文件上传接口
/upload,主要由两个中间件来实现的,第一个处理请求FormData字段和数据流,第二个处理上传逻辑
// 处理FormData数据的中间件
export function makeField() {
return function Field(req: Request, _: Response, next: NextFunction) {
let busboy = new Busboy({
headers: req.headers,
});
busboy.on("file", (fieldname, fileStream, filename, encoding, mimetype) => {
if (!filename) return fileStream.resume();
let finalPath = path.join(os.tmpdir(), `${uuid.v4()}-${filename}`);
let file = {
finalPath: finalPath,
fieldname: fieldname,
originalname: filename,
encoding: encoding,
mimetype: mimetype,
};
appendField(req.body, fieldname, file);
// 将数据临时保存到系统缓存文件夹,这边是牺牲速度换取空间
// 还有个思路是将数据写入Duplex双向流中,传递给下一个中间件处理
let outStream = fs.createWriteStream(finalPath);
fileStream.pipe(outStream);
fileStream.on("end", () => {
outStream.close();
});
});
busboy.on("field", (fieldname, value) => {
appendField(req.body, fieldname, value);
});
busboy.on("finish", () => {
req.unpipe(busboy);
next();
});
req.pipe(busboy);
};
}
// 创建一个待写入的临时文件和记入分片上传信息
export function makeUpload() {
return function Upload(req: Request, res: Response, next: NextFunction) {
let {
sizes,
size,
byteOffset,
position,
chunkIndex,
chunkCount,
md5,
type,
fieldname,
} = req.body;
let { originalname, encoding, mimetype, finalPath, buffers } = req.body[fieldname];
if (!fs.existsSync(getNowTempPath())) {
mkdirsSync(getNowTempPath());
}
// 分片待写入文件
let pathTmp = `${getNowTempPath()}/${md5}.${nameSuffix(originalname)}`;
if (!fs.existsSync(pathTmp)) {
fs.writeFileSync(pathTmp, new Uint8Array());
}
// 分片上传状态
let pathCfgTmp = `${getNowTempPath()}/${md5}.${nameSuffix(originalname)}.cfg`;
if (!fs.existsSync(pathCfgTmp)) {
let arr = new ArrayBuffer(+chunkCount * 12);
uintCodeCfg(arr, (uint32, uint8, index) => {
uint32[0] = index;
uint8[0] = 0;
});
fs.writeFileSync(pathCfgTmp, new Uint8Array(arr));
}
readCacheFile(req, next, pathTmp, pathCfgTmp);
};
}
// 读取缓存文件将分片写入到临时文件中
async function readCacheFile(
req: Request,
next: NextFunction,
pathTmp: string,
pathCfgTmp: string
) {
let {
sizes,
size,
byteOffset,
position,
chunkIndex,
chunkCount,
md5,
type,
fieldname,
} = req.body;
let { originalname, encoding, mimetype, finalPath, buffers } = req.body[
fieldname
];
let fileHandle = await fs.promises.open(pathTmp, "r+");
// 读取缓存文件
let rs = fs.createReadStream(finalPath);
let _position = +position;
rs.on("data", (chunk) => {
let __position = _position;
if (chunk instanceof Uint8Array) {
_position += chunk.length;
// 将chunk写入到具体的位置{{__position}}
fs.write(fileHandle.fd, chunk, 0, chunk.length, __position, () => {});
}
});
rs.on("end", async () => {
await fileHandle.close();
// 删除缓存文件
fs.promises.unlink(finalPath);
let fi = await chunkFinish(pathCfgTmp, +chunkIndex);
// 上传完成后将文件移动到正式文件夹下,并删除分片状态文件
if (fi.filter((f) => f.complete === 1).length === +chunkCount) {
fs.promises.unlink(pathCfgTmp);
let _path = `${getUploadsPath()}/${md5}.${nameSuffix(originalname)}`;
await fs.promises.rename(pathTmp, _path);
appendField(req.body[fieldname], "path", _path);
}
rs.close();
next();
});
}
// 将当前分片状态保存
async function chunkFinish(pathCfgTmp: string, chunkIndex: number) {
// 读取上传详情文件查看分片上传状态
// 保证分片状态文件同步读取和写入,避免旧值覆盖新值
let buf = fs.readFileSync(pathCfgTmp);
let arr = pickTypedArrayBuffer(buf);
let fi: any[] = [];
// 数据编码
uintCodeCfg(arr, (uint32, uint8, index) => {
if (index === +chunkIndex) {
uint8[0] = 1;
}
fi.push({
index: uint32[0],
complete: uint8[0],
});
});
// 写入修改后的状态
fs.writeFileSync(pathCfgTmp, new Uint8Array(arr));
return fi;
}
测试代码
function uploadFile(file: File) {
let { execute, start, stop, next, done } = queueUpload(file, {
chunkSize: 2 * 1024 * 1024,
concurrent: 6,
fieldname: "file",
});
const task = async () => {
// 为了计算速度,计算md5只用头部和尾部的分片
let md5 = await fileMd5(file);
done(async () => {
console.info("finish");
});
let res = await HTTP.post(baseUrl + "/file/renewal", {
md5,
filename: file.name,
}).resule<Result<{ path: string; list: number[] }>>();
if (res.code !== 200) return;
if (res.data.path) {
console.info(res.data.path);
return;
}
let renewalArr = res.data.list;
execute((item) => {
if (!item) return;
item.set("md5", md5);
let chunkIndex = item.get("chunkIndex");
if (chunkIndex && renewalArr.indexOf(+chunkIndex) !== -1) {
next();
return;
}
const upload = async () => {
let res = await HTTP.post(baseUrl + "/file/upload", item).resule<any>();
console.info(res);
next();
};
upload();
});
start();
console.info("start");
};
task();
}
由于fetch的限制,上传进度无法实现,下载功能可以通过 Response.body.getReader()处理下载进度问题
小结
这次服务采用了不同的方式处理分片,去除了分片的合并操作和多次分片的转移,不过这次服务也带来了对临时文件多次的打开和关闭操作,记入分片详情的文件多次读取和写入(可以通过写入内存来实现,去除同步读取操作带来更高的性能),代码上还有很多优化的地方,比如边界的处理和性能的提升等,欢迎大家在评论区,多多评论交流!