EasyExcel停止当前Sheet的读取,且不影响主线程及其他Sheet读取的方法

0 阅读3分钟

有一个这样的需求,读取一个包含多个sheet的excel表,某个sheet校验不通过,就停止读取当前sheet,但是不能影响其他sheet的读取

百度了半天没看到有什么相关的回答,问ai也是胡言乱语,然后转向了google,搜半天也没搜到,但是在搜索结果里面搜到了一个更新日志,还得是google啊

image.png

首先升级版本到3.3.4,然后有两种场景,

  • 第一种,校验表头标题行,如果不通过就直接停止读取
  • 第二种,校验表格数据,如果不通过就停止读取

针对这两种场景进行测试,抛出ExcelAnalysisStopSheetException异常之后依然会调用doAfterAllAnalysed方法,在第一种场景下,里面的数据集为空,第二种场景下,会包含异常行之前的已解析数据

两种场景下都可以正常的停止当前sheet的读取并不影响其他sheet

完整的测试代码如下

import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.ExcelReader;
import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.exception.ExcelAnalysisStopSheetException;
import com.alibaba.excel.read.metadata.ReadSheet;
import com.alibaba.excel.read.metadata.holder.ReadSheetHolder;
import com.alibaba.excel.util.ListUtils;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.formula.functions.T;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

@Slf4j
@SpringBootTest
class ExcelHeaderReaderTest {

    @Test
    void readExcelHeaders2() {
        long l = System.currentTimeMillis();
        String filePath = "excel所在的文件路径";
        int startRow = 1;
        try (ExcelReader excelReader = EasyExcel.read(filePath).build()) {
            List<ReadSheet> sheets = excelReader.excelExecutor().sheetList();
            ArrayList<ReadSheet> readSheets = new ArrayList<>();
            log.info("共找到 {} 个 sheet:", sheets.size());
            for (int i = 0; i < sheets.size(); i++) {
                ReadSheet sheet = sheets.get(i);
                log.info("Sheet {}: {}", i + 1, sheet.getSheetName());
                readSheets.add(EasyExcel.readSheet(i).head(TestDTO.class).registerReadListener(new AbstractExcelImportListener<TestDTO>(
                        (total, data, isLast) -> rowHandle(total, data, isLast, sheet.getSheetName())
                ) {

                    @Override
                    public void invokeHeadMap(Map<Integer, String> headMap, AnalysisContext context) {
                        log.info("解析到一条头数据: {}", headMap);
                        // 执行表头校验
                        if (!validateHeaders(headMap, context.readSheetHolder().getSheetNo())) {
                            // 标记校验失败
                            throw new ExcelAnalysisStopSheetException();
                        }
                    }

                    private boolean validateHeaders(Map<Integer, String> headMap, Integer sheetNo) {
                        // 假设第二个sheet校验不通过
                        return sheetNo != 1;
                    }
                }).headRowNumber(startRow).build());

            }
            // 这里注意 一定要把sheet1 sheet2 一起传进去,不然有个问题就是03版的excel 会读取多次,浪费性能
            excelReader.read(readSheets);
        }
        log.info("耗时{}", System.currentTimeMillis() - l);
    }

    @Test
    void readExcelHeaders3() {
        long l = System.currentTimeMillis();
        String filePath = "excel所在的文件路径";
        int startRow = 1;
        try (ExcelReader excelReader = EasyExcel.read(filePath).build()) {
            List<ReadSheet> sheets = excelReader.excelExecutor().sheetList();
            ArrayList<ReadSheet> readSheets = new ArrayList<>();
            log.info("共找到 {} 个 sheet:", sheets.size());
            for (int i = 0; i < sheets.size(); i++) {
                ReadSheet sheet = sheets.get(i);
                log.info("Sheet {}: {}", i + 1, sheet.getSheetName());
                readSheets.add(EasyExcel.readSheet(i).head(TestDTO.class).registerReadListener(new AbstractExcelImportListener<TestDTO>(
                        (total, data, isLast) -> rowHandle(total, data, isLast, sheet.getSheetName())
                ) {

                    @Override
                    public void invoke(TestDTO data, AnalysisContext context) {
                        // 假设第二个sheet的第二行校验不通过
                        if (context.readRowHolder().getRowIndex() == 2 && context.readSheetHolder().getSheetNo() == 1) {
                            throw new ExcelAnalysisStopSheetException();
                        }
                        count.incrementAndGet();
                        data.setRowNum(context.readRowHolder().getRowIndex() + 1);
                        cachedDataList.add(data);
                        // 达到BATCH_COUNT了,需要去存储一次数据库,防止数据几万条数据在内存,容易OOM
                        if (cachedDataList.size() >= BATCH_COUNT) {
                            excelImportService.saveData(count.get(), cachedDataList, false);
                            // 存储完成清理 list
                            cachedDataList = ListUtils.newArrayListWithExpectedSize(BATCH_COUNT);
                        }
                    }

                    @Override
                    public void invokeHeadMap(Map<Integer, String> headMap, AnalysisContext context) {
                        log.info("解析到一条头数据: {}", headMap);
                        // 执行表头校验
                        if (!validateHeaders(headMap, context.readSheetHolder().getSheetNo())) {
                            // 标记校验失败
                            throw new ExcelAnalysisStopSheetException("表头校验失败");
                        }
                    }

                    private boolean validateHeaders(Map<Integer, String> headMap, Integer sheetNo) {
                        return true;
                    }
                }).headRowNumber(startRow).build());

            }
            // 这里注意 一定要把sheet1 sheet2 一起传进去,不然有个问题就是03版的excel 会读取多次,浪费性能
            excelReader.read(readSheets);
        }
        log.info("耗时{}", System.currentTimeMillis() - l);
    }

    private void rowHandle(int total, List<TestDTO> data, boolean isLast, String sheetName) {
        log.info("读取{}{}行数据:{}", sheetName, total, data);
    }

    @Data
    public static class TestDTO extends ExcelImportDTO {
        @ExcelProperty(index = 0)
        private String name;
        @ExcelProperty(index = 1)
        private Integer age;
        @ExcelProperty(index = 2)
        private String address;
    }
}

监听类,要继承AnalysisEventListener


import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.alibaba.excel.util.ListUtils;
import lombok.extern.slf4j.Slf4j;

import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;

@Slf4j
public abstract class AbstractExcelImportListener<T extends ExcelImportDTO> extends AnalysisEventListener<T> {

    protected static final int BATCH_COUNT = 200;

    protected AtomicInteger count = new AtomicInteger(0);

    /**
     * 缓存的数据
     */
    protected List<T> cachedDataList = ListUtils.newArrayListWithExpectedSize(BATCH_COUNT);
    /**
     * 数据处理对象
     */
    protected final ExcelImportService<T> excelImportService;

    public AbstractExcelImportListener(ExcelImportService<T> excelImportService) {
        this.excelImportService = excelImportService;
        count = new AtomicInteger();
    }

    /**
     * 这个每一条数据解析都会来调用
     *
     * @param data    one row value. Is is same as {@link AnalysisContext#readRowHolder()}
     * @param context
     */
    @Override
    public void invoke(T data, AnalysisContext context) {
        count.incrementAndGet();
        data.setRowNum(context.readRowHolder().getRowIndex() + 1);
        cachedDataList.add(data);
        // 达到BATCH_COUNT了,需要去存储一次数据库,防止数据几万条数据在内存,容易OOM
        if (cachedDataList.size() >= BATCH_COUNT) {
            excelImportService.saveData(count.get(), cachedDataList, false);
            // 存储完成清理 list
            cachedDataList = ListUtils.newArrayListWithExpectedSize(BATCH_COUNT);
        }
    }

    /**
     * 所有数据解析完成了 都会来调用
     *
     * @param context
     */
    @Override
    public void doAfterAllAnalysed(AnalysisContext context) {
        // 这里也要保存数据,确保最后遗留的数据也存储到数据库
        excelImportService.saveData(count.get(), cachedDataList, true);
        log.info("所有数据解析完成!");
    }

}

excel示例