fileupload

234 阅读4分钟
    /**
    文件上传:
        1. 拖拽,粘贴
        2. 分片,断点续传
        3. 如何限制只能上传png格式的文件
            split不ok,因为pdf文件重命名就越过了
            文件头信息
        4. 如何判断文件的唯一性
            算hash,比如md5
            文件过大,1G,怎么算md5不卡顿
                web-worker
                时间切片(学了react fiber架构get到的知识点,requestIdleCallback
                抽样
        5. 
            文件切片数量过多,上传请求过多,导致卡顿,怎么处理
                控制异步任务并发数量
        6. 场景的深挖
            如果有报错,怎么重试,报错三次怎么统一终止所有切片
        7. 每个切片的大小如何控制,移动端和pc端网速不一致
            应该根据网速来动态调整包的大小
        8. 并发数 + 包控制如何实现
    */

    function blobToString(blob) {
        return new Promise(function(resolve) {
            const reader = new FileReader();
            reader.onload = function() {
                const ret = reader.result.split('')
                    .map(v=>v.charCodeAt())
                    .map(v=>v.toString(16).toUpperCase())
                    .map(v=>v.padStart(2,'0'))
                    .join(' ');
                resolve(ret);
            };
            reader.readAsBinaryString(blob);
        });
    }

    document.getElementById("file-upload").addEventListener("change", async function(e){
        const file = e.target.files[0];
        const isJpgType = await isJpg(file);
        const isPngType = await isPng(file);
        const isGifType = await isGif(file);
        if(isJpgType) {
            console.log("file type is JPG");
        }
        if(isPngType) {
            console.log("file type is PNG");
        }
        if(isGifType) {
            console.log("file type is GIF");
        }
    });


    // https://blog.csdn.net/adzcsx2/article/details/50503506 
    async function isJpg(file) {
        // jpg开头两个仕 FF D8
        // 结尾两个仕 FF D9
        const len = file.size
        const start = await blobToString(file.slice(0,2))
        const tail = await blobToString(file.slice(-2, len))
        const isjpg = start==='FF D8' && tail==='FF D9';
        if(isjpg){
            const heightStart = parseInt('A3',16)
            const widthStart = parseInt('A5',16)
            const {w,h} = await getRectByOffset(file,[widthStart,widthStart+2],[heightStart,heightStart+2])
            console.log('jpg大小',w, h)
        }
        return isjpg;
    }

    async function isGif(file) {
        const ret = await blobToString(file.slice(0,6))
        const isgif = (ret==='47 49 46 38 39 61') || (ret==='47 49 46 38 37 61');
        if(isgif) {
            const {w,h} = await getRectByOffset(file,[6,8],[8,10],true)
            console.log('gif宽高',w,h)
        } 
        return isgif;
         // 文件头16进制 47 49 46 38 39 61 或者47 49 46 38 37 61
        // 分别仕89年和87年的规范
        // const tmp = '47 49 46 38 39 61'.split(' ')
        //               .map(v=>parseInt(v,16))
        //               .map(v=>String.fromCharCode(v))
        // console.log('gif头信息',tmp)
        // // 或者把字符串转为16进制 两个方法用那个都行
        // const tmp1 = 'GIF89a'.split('')
        //                 .map(v=>v.charCodeAt())
        //                 .map(v=>v.toString(16))
        // console.log('gif头信息',tmp1)
        
        // return ret ==='GIF89a' || ret==='GIF87a'
        // 文件头标识 (6 bytes) 47 49 46 38 39(37) 61
    }

    async function isPng(file) {
        const ret = await blobToString(file.slice(0,8))
        const ispng = ret==='89 50 4E 47 0D 0A 1A 0A';
        if(ispng) {
            const {w,h} = await getRectByOffset(file,[18,20],[22,24])
            console.log('png宽高',w,h)
        } 
        return ispng;
    }

    async function getRectByOffset(file,widthOffset,heightOffset,reverse){
        let width = await blobToString(file.slice(...widthOffset))
        let height = await blobToString(file.slice(...heightOffset))

        if(reverse){
            // 比如gif 的宽,6和7 是反着排的 大小端存储
            // 比如6位仕89,7位仕02, gif就是 0289 而不是8920的值 切分后翻转一下
            width = [width.slice(3,5),width.slice(0,2)].join(' ')
            height = [height.slice(3,5),height.slice(0,2)].join(' ')
        }
        const w = parseInt(width.replace(' ', ''),16)
        const h = parseInt(height.replace(' ', ''),16)
        return {w,h}
    }


    //按size切片
    function createFileChunk(file, size) {
        const chunks = [];
        let cur = 0;
        while(cur < file.size) {
            chunks.push({
                index: cur,
                file: file.slice(cur, cur + size)
            });
            cur += size;
        }
        return chunks;
    }

    function ext(filename){
        // 返回文件后缀名
        return filename.split('.').pop()
    }

    async function handleUpload() {
        let chunks = createFileChunk(file);

        // 1.计算hash 文件指纹标识
        // this.hash = await this.calculateHash(this.file)
        // 2.web-worker
        // this.hash = await this.calculateHashWorker(chunks)

        // 3.requestIdleCallback
        // this.hash = await this.calculateHashIdle(chunks)

        //4.抽样哈希,牺牲一定的准确率 换来效率,hash一样的不一定是同一个文件, 但是不一样的一定不是 
        // 所以可以考虑用来预判
        this.hash = await this.calculateHashSample()


        //检查文件是否已经上传
        //发送check请求,uploaded表示文件是否全部上传完毕,uploadedList表示已经上传成功的切片
        const { uploaded, uploadedList } = await this.$axios.post('/check',{
            ext:this.ext(this.file.name),
            hash:this.hash
            }
        )
        if(uploaded){
            console.log("上传成功");
            return;
        }

        //切片
        this.chunks = chunks.map((chunk,index)=>{
            // 每一个切片的名字
            const chunkName = this.hash+'-'+index
            return {
                hash:this.hash,
                chunk:chunk.file,
                name:chunkName,
                index,
                // 设置进度条
                progress: uploadedList.indexOf(chunkName) > -1 ? 100 : 0,
            }
        });

        await uploadChunks(uploadedList);
    }

    async function uploadChunks(uploadedList = []) {
        const list = this.chunks
            .filter(chunk => uploadedList.indexOf(chunk.name) == -1)
            .map(({ chunk,name, hash, index }, i) => {
            const form = new FormData();
            form.append("chunkname", name)
            form.append("ext", this.ext(this.file.name))
            form.append("hash", hash)
            // form.append("file", new File([chunk],name,{hash,type:'png'}))
            form.append("file",chunk)

            return { form, index,error:0}
        });
        //   .map(({ form, index }) =>this.$axios.post('/upload',form, {
        //     onUploadProgress: progress => {
        //       this.chunks[index].progress = Number(((progress.loaded / progress.total) * 100).toFixed(2));
        //     }
        //   }))
        // await Promise.all(list);
        try {
            await sendRequest(list, 4);
            if(uploadedList.length + list.length === this.chunks.length){
                await this.mergeRequest();
            }
        }catch(e) {
            alert('上传似乎除了点小问题,重试试试哈');
        }
    }

    function sendRequest(chunks, limit = 4) {
        return new Promise(function(resolve, reject) {
            const len = chunks.length;
            let counter = 0;
            // 全局开关
            let isStop = false ;

            const start = async function () {
                if(isStop) {
                    return;
                }

                const task = chunks.shift();
                if(task) {
                    const {form,index} = task
                    try {
                        await this.$axios.post('/upload',form, {
                            onUploadProgress: progress => {
                            this.chunks[index].progress = Number(((progress.loaded / progress.total) * 100).toFixed(2));
                            }
                        });
                        if(counter === len - 1){
                            resolve();
                        }else {
                            counter ++;
                            start();
                        }
                    }catch(e) {
                        // 当前切片报错了
                        // 尝试3次重试机制,重新push到数组中
                        console.log('出错了')
                        this.chunks[index].progress = -1;
                        if(task.error < 3) {
                            task.error ++;
                            // 队首进去 准备重试
                            chunks.unshift(task)
                            start()
                        }else {
                            isStop = true;
                            reject();
                        }
                    }
                }
            }


            while(limit>0){
                setTimeout(()=>{
                    // 模拟延迟
                    start()
                }, Math.random()*2000)

                limit-=1
            }
        });
    }

    async function mergeRequest(){
        await this.$axios.post("/merge", {
            ext: this.ext(this.file.name),
            size: CHUNK_SIZE,
            hash: this.hash
        });
    }

    function calculateHashSample() {
        return new Promise(function(resolve) {
            const spark = new sparkMd5.ArrayBuffer();
            const reader = new FileReader();
            const file = this.file;
            // 文件大小
            const size = this.file.size;
            let offset = 2 * 1024 * 1024;

            let chunks = [file.slice(0, offset)];

            // 前面100K

            let cur = offset;
            while (cur < size) {
                // 最后一块全部加进来
                if (cur + offset >= size) {
                    chunks.push(file.slice(cur, cur + offset));
                } else {
                    // 中间的 前中后去两个字节
                    const mid = cur + offset / 2;
                    const end = cur + offset;
                    chunks.push(file.slice(cur, cur + 2));
                    chunks.push(file.slice(mid, mid + 2));
                    chunks.push(file.slice(end - 2, end));
                }
                // 前取两个字节
                cur += offset;
            }
            // 拼接
            reader.readAsArrayBuffer(new Blob(chunks));

            // 最后100K
            reader.onload = e => {
                spark.append(e.target.result);
                this.hashProgress = 100
                resolve(spark.end());
            };
        });
    }
    
    async function calculateHash(file) {
        // 直接计算md5 大文件会卡顿
        const ret = await this.blobToData(file)
        return sparkMd5.hash(ret)
    }

    function blobToData(blob) {
        return new Promise(function(resolve) {
            const reader = new FileReader()
            reader.onload = function () {
            resolve(reader.result)
            }
            reader.readAsBinaryString(blob);
        });
    }

    // web-worker 防止卡顿主线程
    function calculateHashWorker(chunks) {
        return new Promise(function(resolve) {
            // web-worker 防止卡顿主线程
            this.worker = new Worker("/hash.js");
            this.worker.postMessage({ chunks });
            this.worker.onmessage = e => {
            const { progress, hash } = e.data;
            this.hashProgress = Number(progress.toFixed(2));
            if (hash) {
                resolve(hash);
            }
            };
        });
    }

    function calculateHashIdle() {
        return new Promise(function(resolve) {
            const spark = new sparkMd5.ArrayBuffer();
            let count = 0;
            const appendToSpark = async file => {
                return new Promise(resolve => {
                    const reader = new FileReader();
                    reader.readAsArrayBuffer(file);
                    reader.onload = e => {
                    spark.append(e.target.result);
                    resolve();
                    };
                });
            };
            const workLoop = async deadline => {
            // 有任务,并且当前帧还没结束
            while (count < chunks.length && deadline.timeRemaining() > 1) {
                await appendToSpark(chunks[count].file);
                count++;
                // 没有了 计算完毕
                if (count < chunks.length) {
                // 计算中
                this.hashProgress = Number(
                    ((100 * count) / chunks.length).toFixed(2)
                );
                // console.log(this.hashProgress)
                } else {
                // 计算完毕
                this.hashProgress = 100;
                resolve(spark.end());
                }
            }
            console.log(`浏览器有任务拉,开始计算${count}个,等待下次浏览器空闲`)

            window.requestIdleCallback(workLoop);
            };
            window.requestIdleCallback(workLoop);
        });
    }

    function handleUploadByRate() {
        const fileSize = file.size
        let offset = 0.1*1024*1024 

        let cur = 0 
        let count =0
        this.hash = await this.calculateHashSample();

        while(cur < fileSize) {
            const chunk = file.slice(cur, cur + offset);
            cur += offset;
            const chunkName = this.hash + "-" + count;

            const form = new FormData();

            form.append("chunkname", chunkName)
            form.append("ext", this.ext(this.file.name))
            form.append("hash", this.hash)
            // form.append("file", new File([chunk],name,{hash,type:'png'}))

            let start = new Date().getTime()
            await this.$axios.post( '/upload', form)
            const now = new Date().getTime()

            const time = ((now -start)/1000).toFixed(4)

            // 期望10秒一个切片
            let rate = time/10
            // 速率有最大和最小 可以考虑更平滑的过滤 比如1/tan 
            if(rate<0.5) rate=0.5
            if(rate>2) rate=2
            // 新的切片大小等比变化
            console.log(`切片${count}大小是${this.format(offset)},耗时${time}秒,是30秒的${rate}倍,修正大小为${this.format(offset/rate)}`)
            offset = parseInt(offset/rate)
            // if(time)
            count++
        }
    }