压缩大文件下载,边压缩边下载

569 阅读2分钟

背景

因公司网络架构,实现网络隔离,通过MQ进行数据通信,客户有批量下载文件的需求,当批量文件太大时,压缩文件耗时长,等待太久web容易自动断开。故有此猜想,开启多个线程处理文件,并压入大文件中 并返回文件路径,供web段发起下载。

相关的依赖

  • springboot
  • hutoll
  • zip4j

线程池配置

import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;

import java.util.concurrent.ThreadPoolExecutor;

@Configuration
public class BeanConfig {
    @Bean
    ThreadPoolTaskExecutor parallelTaskServiceExecutor() {
        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
        executor.setCorePoolSize(1);
        executor.setMaxPoolSize(3);
        executor.setQueueCapacity(200);
        executor.setKeepAliveSeconds(300);
        executor.setThreadNamePrefix("task-parallel-");
        executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
        executor.initialize();
        return executor;
    }

}

多线程处理文件

import cn.hutool.core.io.FileUtil;
import com.alibaba.fastjson.JSONObject;
import net.lingala.zip4j.ZipFile;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;

@Service
public class BigFileService {

    private static final Logger logger = LoggerFactory.getLogger(BigFileService.class);

    @Autowired
    ThreadPoolTaskExecutor parallelTaskServiceExecutor;

    /**
     * 压缩处理文件,并返回最终的文件路径
     *
     * @param filePathList 需要处理的文件地址集合
     * @return 文件路径
     * @throws IOException
     */
    public String processBigFile(List<String> filePathList) throws IOException {
        // 计数器,有多少个文件需要处理
        CountDownLatch countDownLatch = new CountDownLatch(filePathList.size());
        // 有且仅有一个线程进行文件压缩
        Semaphore zipSemaphore = new Semaphore(1);
        // 使用临时目录, 压缩文件地址
        File tempFile = File.createTempFile("bigfile", ".zip");

        // 最终的压缩文件, 使用zip4j
        try (final ZipFile zipFile = new ZipFile(tempFile)) {
            // 同步压缩结果文件
            File syncFile = new File(tempFile.getAbsolutePath() + ".txt");
            // 初始化压缩任务
            this.writeSync(syncFile, false, zipFile.getFile().length());

            new Thread(() -> {
                logger.info("开始执行文件处理");
                filePathList.forEach(path -> {
                    parallelTaskServiceExecutor.submit(() -> {
                        try {
                            File zip = processSingleFile(path);
                            if (zip != null && zip.length() > 0) {
                                try {
                                    logger.info(" {} 抢占压缩资源", zip.getName());
                                    zipSemaphore.acquire();
                                    zipFile.addFile(zip);
                                    Thread.sleep(3000);
                                    logger.info(" {} 完成压缩", zip.getName());
                                } catch (Exception e) {
                                    logger.error(zip.getName() + " 压缩文件异常", e);
                                } finally {
                                    this.writeSync(syncFile, true, zipFile.getFile().length());
                                    zipSemaphore.release();
                                }
                            }
                        } catch (Exception e) {
                            logger.error("处理文件过程异常 " + path, e);
                        } finally {
                            countDownLatch.countDown();
                        }
                    });
                });

                try {
                    // 等待所有文件压缩完成
                    countDownLatch.await();
                } catch (Exception e) {
                    logger.error("等待全部文件处理结束异常", e);
                } finally {
                    logger.info("全部文件处理结束 {} {}", zipFile.getFile().getAbsolutePath(), zipFile.getFile().length());
                    this.writeSync(syncFile, true, zipFile.getFile().length());
                }
            }, tempFile.getName()).start();

            // 返回最终文件的绝对路径
            return zipFile.getFile().getAbsolutePath();
        }
    }

    private void writeSync(File file, boolean end, long total) {
        JSONObject obj = new JSONObject();
        obj.put("end", end);
        obj.put("total", total);
        FileUtil.writeString(obj.toJSONString(), file, StandardCharsets.UTF_8);
    }

    private File processSingleFile(String path) {
        // 执行单个文件处理

        // 返回最终的文件
        return new File(path);
    }

}

Controller 接口


import cn.hutool.core.io.FileUtil;
import com.alibaba.fastjson.JSONObject;
import jakarta.servlet.ServletOutputStream;
import jakarta.servlet.http.HttpServletResponse;
import lombok.extern.slf4j.Slf4j;
import net.lingala.zip4j.ZipFile;
import net.lingala.zip4j.model.enums.RandomAccessFileMode;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;

@RestController
@RequestMapping("/down")
public class DownController {

    @Autowired
    BigFileService bigFileService;

    @GetMapping("/download")
    public void download(HttpServletResponse response) {
        List<String> filePathList = new ArrayList<>();
        try {
            String filePath = bigFileService.processBigFile(filePathList);
            File file = new File(filePath);
            File syncFile = new File(file.getAbsolutePath() + ".txt");
            long start = 0L;
            byte[] body = new byte[51200];
        
            response.setHeader("Accept-Ranges", "bytes");

            ServletOutputStream outputStream = response.getOutputStream();
            while (true) {
                JSONObject syncObj = JSONObject.parseObject(FileUtil.readString(syncFile, StandardCharsets.UTF_8));
                if (syncObj == null) {
                    continue;
                }

                boolean end = syncObj.getBooleanValue("end");
                long total = syncObj.getLongValue("total");

                try (RandomAccessFile randomAccessFile = new RandomAccessFile(filePath, RandomAccessFileMode.READ.getValue())) {
                    long curLength = randomAccessFile.length();
                    if (end) {
                        if (start >= curLength) {
                            break;
                        }
                    } else {
                        // 此处保留一部分数据,不然会有部分文件异常,
                        // 并且等待一个文件完整的压缩完成
                        if (start > (curLength - body.length * 2) || curLength != total) {
                            continue;
                        }
                    }

                    // 跳转到上次读取的位置
                    randomAccessFile.seek(start);
                    int read = randomAccessFile.read(body);

                    // 记录读取的位置
                    start += read;

                    outputStream.write(body, 0, read);
                    outputStream.flush();

                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

缺点

  1. 浏览器没法知道文件总大小
  2. 无法提供断点下载功能