需求
一个上传组件,需要具备的功能:
- 需要校验文件格式
- 可以上传任何文件,包括超大的视频文件(切片)
- 上传期间断网后,再次联网可以继续上传(断点续传)
- 要有进度条提示
- 已经上传过同一个文件后,直接上传完成(秒传)
前后端分工:
前端:
文件格式校验 文件切片、md5计算 发起检查请求,把当前文件的hash发送给服务端,检查是否有相同hash的文件 上传进度计算 上传完成后通知后端合并切片
后端:
检查接收到的hash是否有相同的文件,并通知前端当前hash是否有未完成的上传 接收切片 合并所有切片
具体实现
一、格式校验
对于上传的文件,一般来说,我们要校验其格式,仅需要获取文件的后缀(扩展名),即可判断其是否符合我们的上传限制:
//文件路径
var filePath = "file://upload/test.png";
//获取最后一个.的位置
var index= filePath.lastIndexOf(".");
//获取后缀
var ext = filePath.substr(index+1);
//输出结果
console.log(ext);
// 输出: png
但是,这种方式有个弊端,那就是我们可以随便篡改文件的后缀名,比如:test.mp4 ,我们可以通过修改其后缀名:test.mp4 -> test.png ,这样即可绕过限制进行上传。那有没有更严格的限制方式呢?当然是有的。 那就是通过查看文件的二进制数据来识别其真实的文件类型,因为计算机识别文件类型时,并不是真的通过文件的后缀名来识别的,而是通过 “魔数”(Magic Number)来区分,对于某一些类型的文件,起始的几个字节内容都是固定的,根据这几个字节的内容就可以判断文件的类型。借助十六进制编辑器,可以查看一下图片的二进制数据,我们还是以test.png为例:
由上图可知,PNG 类型的图片前 8 个字节是 0x89 50 4E 47 0D 0A 1A 0A。基于这个结果,我们可以据此来做文件的格式校验,以vue项目为例:
<template>
<div>
<input
type="file"
id="inputFile"
@change="handleChange"
/>
</div>
</template>
<script>
export default {
name: "fileUpload",
methods: {
check(headers) {
return (buffers, options = { offset: 0 }) =>
headers.every(
(header, index) => header === buffers[options.offset + index]
);
},
async handleChange(event) {
const file = event.target.files[0];
// 以PNG为例,只需要获取前8个字节,即可识别其类型
const buffers = await this.readBuffer(file, 0, 8);
const uint8Array = new Uint8Array(buffers);
const isPNG = this.check([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
// 上传test.png后,打印结果为true
console.log(isPNG(uint8Array))
},
readBuffer(file, start = 0, end = 2) {
// 获取文件的二进制数据,因为我们只需要校验前几个字节即可,所以并不需要获取整个文件的数据
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => {
resolve(reader.result);
};
reader.onerror = reject;
reader.readAsArrayBuffer(file.slice(start, end));
});
}
}
};
</script>
以上为校验文件类型的方法,对于其他类型的文件,比如mp4,xsl等,大家感兴趣的话,也可以通过工具查看其二进制数据,以此来做格式校验。
以下为汇总的一些文件的二进制标识:
1.JPEG/JPG - 文件头标识 (2 bytes): ff, d8 文件结束标识 (2 bytes): ff, d9
2.TGA - 未压缩的前 5 字节 00 00 02 00 00 - RLE 压缩的前 5 字节 00 00 10 00 00
3.PNG - 文件头标识 (8 bytes) 89 50 4E 47 0D 0A 1A 0A
4.GIF - 文件头标识 (6 bytes) 47 49 46 38 39(37) 61
5.BMP - 文件头标识 (2 bytes) 42 4D B M
6.PCX - 文件头标识 (1 bytes) 0A
7.TIFF - 文件头标识 (2 bytes) 4D 4D 或 49 49
8.ICO - 文件头标识 (8 bytes) 00 00 01 00 01 00 20 20
9.CUR - 文件头标识 (8 bytes) 00 00 02 00 01 00 20 20
10.IFF - 文件头标识 (4 bytes) 46 4F 52 4D
11.ANI - 文件头标识 (4 bytes) 52 49 46 46
文件切片
<script>
import request from "../utils/request";
import hashWorker from "../utils/hash-worker";
import WorkerBuilder from "../utils/worker-build";
export default {
name: "fileUpload",
const CHUNK_SIZE = 2 * 1024 * 1024;
data () {
return {
fileName: '',
fileHash: '',
chunkList: [],
hashPercentage: 0
}
},
methods: {
getFileSuffix = (fileName) => {
return "";
},
splitFile = (file, size = CHUNK_SIZE) => {
const fileChunkList = [];
let curChunkIndex = 0;
while (curChunkIndex <= file.size) {
const chunk = file.slice(curChunkIndex, curChunkIndex + size);
fileChunkList.push({ chunk: chunk, })
curChunkIndex += size;
}
return fileChunkList;
},
// 选择文件
handleFileChange = (e) => {
const { files } = e.target;
if (files.length === 0) return;
// 保存文件名
setFileName(files[0].name);
// 文件分片
const chunkList = splitFile(files[0])
setChunkList(chunkList);
},
// 切片发送完毕向后端发送 合并的请求
mergeRequest = (hash) => {
request({
url: "http://localhost:3001/merge",
method: "post",
headers: {
"content-type": "application/json"
},
data: JSON.stringify({
// 服务器存储的文件名:hash+文件后缀名
fileHash: hash,
suffix: getFileSuffix(fileName),
// 用于服务器合并文件
size: CHUNK_SIZE
})
})
},
// 上传分片
uploadChunks = async (chunksData, hash) => {
const formDataList = chunksData.map(({ chunk, hash }) => {
const formData = new FormData()
formData.append("chunk", chunk);
formData.append("hash", hash);
formData.append("suffix", getFileSuffix(fileName));
return { formData };
})
const requestList = formDataList.map(({ formData }, index) => {
return request({
url: "http://localhost:3001/upload",
data: formData,
onprogress: e => {
let list = [...chunksData];
list[index].progress = parseInt(String((e.loaded / e.total) * 100));
setChunkList(list)
}
})
})
// 上传文件
Promise.all(requestList).then(() => {
// 延迟发送合并请求,方便观察服务器合并文件的步骤
setTimeout(() => {
mergeRequest(hash);
}, 1000);
})
}
// 计算文件hash
calculateHash = (chunkList) => {
return new Promise(resolve => {
const woker = new WorkerBuilder(hashWorker)
woker.postMessage({ chunkList: chunkList })
woker.onmessage = e => {
const { percentage, hash } = e.data;
setHashPercentage(percentage); // 这里是设置 传输的百分比
if (hash) {
// 当hash计算完成时,执行resolve
resolve(hash)
}
}
})
}
// 上传文件
handleUpload = async (e) => {
if (!fileName) {
alert("请先选择文件")
return;
}
if (chunkList.length === 0) {
alert("文件拆分中,请稍后...")
return;
}
// 计算hash
const hash = await calculateHash(chunkList)
console.log("文件的hash为:", hash)
setFileHash(hash)
const { shouldUpload, uploadedChunkList } = await verfileIsExist(hash, getFileSuffix(fileName));
console.log(shouldUpload)
if (!shouldUpload) {
alert("文件已存在,无需重复上传");
return;
}
let uploadedChunkIndexList = [];
if (uploadedChunkList && uploadedChunkList.length > 0) {
uploadedChunkIndexList = uploadedChunkList.map(item => {
const arr = item.split("-");
return parseInt(arr[arr.length - 1])
})
console.log(uploadedChunkIndexList)
alert("已上传的区块号:" + uploadedChunkIndexList.toString())
}
const chunksData = chunkList.map(({ chunk }, index) => ({
chunk: chunk,
hash: hash + "-" + index,
progress: 0
})).filter(item2 => {
// 过滤掉已上传的块
const arr = item2.hash.split("-")
return uploadedChunkIndexList.indexOf(parseInt(arr[arr.length - 1])) === -1;
})
console.log(chunksData)
// 保存分片数据
setChunkList(chunksData)
// 开始上传分片
uploadChunks(chunksData, hash)
}
// 秒传:验证文件是否存在服务器
verfileIsExist = async (fileHash, suffix) => {
const { data } = await request({
url: "http://localhost:3001/verFileIsExist",
headers: {
"content-type": "application/json"
},
data: JSON.stringify({
fileHash: fileHash,
suffix: suffix
})
})
return JSON.parse(data);
},
ProgressBox = ({ chunkList = [], size = 40 }) => {
const sumProgress = useMemo(() => {
if (chunkList.length === 0) return 0
return chunkList.reduce((pre, cur, sum) => pre + cur.progress / 100, 0) * 100 / (chunkList.length)
}, [chunkList])
}
}
}
</script>
hash-worker.js
const hashWorker = () => {
self.importScripts("http://localhost:3000/spark-md5.min.js")
self.onmessage = (e) => {
const { chunkList } = e.data;
const spark = new self.SparkMD5.ArrayBuffer();
let percentage = 0;
let count = 0;
const loadNext = index => {
const reader = new FileReader();
reader.readAsArrayBuffer(chunkList[index].chunk);
reader.onload = event => {
count++;
spark.append(event.target.result);
if (count === chunkList.length) {
self.postMessage({
percentage: 100,
hash: spark.end()
})
self.close();
} else {
percentage += (100 / chunkList.length)
self.postMessage({
percentage
})
loadNext(count)
}
}
}
loadNext(count)
}
}
export default hashWorker
worker-build.js
export default class WorkerBuilder extends Worker {
constructor(worker) {
const code = worker.toString();
const blob = new Blob([`(${code})()`]);
return new Worker(URL.createObjectURL(blob));
}
}
request.js
const request = ({
url,
method = "post",
data,
headers = {},
onprogress
}) => {
return new Promise(resolve => {
const xhr = new XMLHttpRequest();
xhr.open(method, url);
Object.keys(headers).forEach(key =>
xhr.setRequestHeader(key, headers[key])
);
xhr.upload.onprogress = onprogress
xhr.send(data);
xhr.onload = e => {
resolve({
data: e.target.response
});
};
});
}
export default request;
服务端
import express from 'express'
import path from "path";
import fse from "fs-extra";
import multiparty from "multiparty";
import bodyParser from "body-parser";
let app = express()
const __dirname = path.resolve(path.dirname(''));
const UPLOAD_FILES_DIR = path.resolve(__dirname, "./filelist")
// 配置请求参数解析器
const jsonParser = bodyParser.json({ extended: false });
// 配置跨域
app.use(function (req, res, next) {
res.setHeader("Access-Control-Allow-Origin", "*");
res.setHeader("Access-Control-Allow-Headers", "*");
next()
})
// 获取已上传的文件列表
const getUploadedChunkList = async (fileHash) => {
const isExist = fse.existsSync(path.resolve(UPLOAD_FILES_DIR, fileHash))
if (isExist) {
return await fse.readdir(path.resolve(UPLOAD_FILES_DIR, fileHash))
}
return []
}
app.post('/verFileIsExist', jsonParser, async (req, res) => {
const { fileHash, suffix } = req.body;
const filePath = path.resolve(UPLOAD_FILES_DIR, fileHash + "." + suffix);
if (fse.existsSync(filePath)) {
res.send({
code: 200,
shouldUpload: false
})
return;
}
const list = await getUploadedChunkList(fileHash);
if (list.length > 0) {
res.send({
code: 200,
shouldUpload: true,
uploadedChunkList: list
})
return;
}
res.send({
code: 200,
shouldUpload: true,
uploadedChunkList: []
})
})
app.post('/upload', async (req, res) => {
const multipart = new multiparty.Form();
multipart.parse(req, async (err, fields, files) => {
if (err) return;
const [chunk] = files.chunk;
const [hash] = fields.hash;
const [suffix] = fields.suffix;
// 注意这里的hash包含文件的hash和块的索引,所以需要使用split切分
const chunksDir = path.resolve(UPLOAD_FILES_DIR, hash.split("-")[0]);
if (!fse.existsSync(chunksDir)) {
await fse.mkdirs(chunksDir);
}
await fse.move(chunk.path, chunksDir + "/" + hash);
})
res.status(200).send("received file chunk")
})
const pipeStream = (path, writeStream) =>
new Promise(resolve => {
const readStream = fse.createReadStream(path);
readStream.on("end", () => {
fse.unlinkSync(path);
resolve();
});
readStream.pipe(writeStream);
});
// 合并切片
const mergeFileChunk = async (filePath, fileHash, size) => {
const chunksDir = path.resolve(UPLOAD_FILES_DIR, fileHash);
const chunkPaths = await fse.readdir(chunksDir);
chunkPaths.sort((a, b) => a.split("-")[1] - b.split("-")[1]);
console.log("指定位置创建可写流", filePath);
await Promise.all(
chunkPaths.map((chunkPath, index) =>
pipeStream(
path.resolve(chunksDir, chunkPath),
// 指定位置创建可写流
fse.createWriteStream(filePath, {
start: index * size,
end: (index + 1) * size
})
)
)
);
// 合并后删除保存切片的目录
fse.rmdirSync(chunksDir);
};
app.post('/merge', jsonParser, async (req, res) => {
const { fileHash, suffix, size } = req.body;
const filePath = path.resolve(UPLOAD_FILES_DIR, fileHash + "." + suffix);
await mergeFileChunk(filePath, fileHash, size);
res.send({
code: 200,
message: "success"
});
})
app.listen(3001, () => {
console.log('listen:3001')
})