ExcelImportUtil(大批量数据导入)

1,140 阅读5分钟

1、依赖

<poi.version>4.1.0</poi.version>
<commons.io>2.6</commons.io>
<hutool.version>4.5.16</hutool.version>

<!-- 日志打印 -->
<dependency>
    <groupId>org.projectlombok</groupId>
    <artifactId>lombok</artifactId>
    <optional>true</optional>
</dependency>

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>${poi.version}</version>
</dependency>

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>${poi.version}</version>
</dependency>

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>${poi.version}</version>
</dependency>

 <dependency>
     <groupId>commons-io</groupId>
    <artifactId>commons-io</artifactId>
    <version>${commons.io}</version>
</dependency>

<!-- entity 映射 -->
<dependency>
    <groupId>cn.hutool</groupId>
    <artifactId>hutool-all</artifactId>
    <version>${hutool.version}</version>
</dependency>

2、Vo

package com.example.entity.vo;


import cn.hutool.core.collection.CollectionUtil;
import com.example.util.DataCheck;
import lombok.Data;
import org.apache.commons.lang3.StringUtils;

import javax.validation.constraints.NotNull;
import javax.validation.constraints.Size;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@Data
public class StudentVo {

    /**
     * 姓名
     */
    @NotNull(message = "姓名不能为空")
    @Size(min = 1, message = "姓名格式不正确")
    private String name;

    /**
     * 年龄
     * regexp 传入正则表达式
     */
    //@Pattern(regexp = "",message = "年龄格式格不正确")
    private String age;

    /**
     * 钱
     */
    //@Pattern(regexp = "",message = "")
    private String mony;

    /**
     * 错误信息
     */
    private String message;

    /**
     * 校验数据是否符合规则
     *
     * @param studentVoList 传入的校验数据
     * @return 不符合规则的队列
     */
    public static List<StudentVo> check(List<StudentVo> studentVoList) {

        //创建失败队列
        List<StudentVo> failList = Collections.synchronizedList(new ArrayList<>());

        //使用javax进行数据格式校验,并对校验通过的队列进行分组操作
        Map<String, List<StudentVo>> collect = studentVoList.parallelStream().filter(studentVo -> {
            List<String> strings = DataCheck.validMessage(studentVo);
            if (CollectionUtil.isEmpty(strings)) {
                return true;
            }
            studentVo.setMessage(StringUtils.join(strings, ","));
            failList.add(studentVo);
            return false;
        }).collect(
                //进行分组操作,业务场景:如果多个excel行属于同一个数据,那么根据规则拆分为多个list
                Collectors.groupingBy(
                        studentVo -> studentVo.getName().hashCode() + ""
                )
        );


        //根据自定义的校验规则进行校验
        collect.forEach(
                (s, List) -> {
                    //校验规则
                    //加入失败队列
                }
        );

        return failList;
    }
}

3、解析

package com.example.util;

import cn.hutool.core.bean.BeanUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.Styles;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import javax.xml.parsers.ParserConfigurationException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;


@Slf4j
public class ExcelImportUtil {
    private OPCPackage xlsxPackage;
    /**
     * 头数组(从左到右)
     */
    private String[] headers;
    /**
     * 处理行数据(k->头)
     */
    private Consumer<Map<String, String>> consumer;
    /**
     * 是否读取第一行
     */
    private boolean readFirst;
    /**
     * 是否读取空行
     */
    private boolean readNullRaw;
    /**
     * 是否只读第一个sheet页
     */
    private boolean readFirstSheetOnly;

    /**
     * @param file     读取的文件
     * @param headers  头数组
     * @param consumer 消费者
     */
    public ExcelImportUtil(File file
            , String[] headers
            , Consumer<Map<String, String>> consumer) {
        this(file, headers, consumer
                , false, false, false);
    }

    /**
     * @param file               读取的文件
     * @param headers            头数组
     * @param consumer           消费者
     * @param readFirst          是否读取第一行
     * @param readNullRaw        readNullRaw
     * @param readFirstSheetOnly 是否只读第一个sheet页
     */
    public ExcelImportUtil(File file
            , String[] headers
            , Consumer<Map<String, String>> consumer
            , boolean readFirst
            , boolean readNullRaw
            , boolean readFirstSheetOnly) {
        try {
            //解析并读取文件
            this.xlsxPackage = OPCPackage.open(file, PackageAccess.READ);
        } catch (InvalidFormatException e) {
            log.error(e.getMessage(), e);
        }
        this.headers = headers;
        this.consumer = consumer;
        this.readFirst = readFirst;
        this.readNullRaw = readNullRaw;
        this.readFirstSheetOnly = readFirstSheetOnly;
    }


    /**
     * 导入Excel
     *
     * @param file   导入的文件
     * @param header Excel头转对象
     * @return 对象集合
     */
    public static List<Map<String, String>> doImport(File file
            , String[] header) {
        List<Map<String, String>> list = new ArrayList<>();
        ExcelImportUtil excelImportUtil = new ExcelImportUtil(
                file
                , header
                //遍历消费者中的数据,加入list
                , list::add);
        // 执行解析
        excelImportUtil.process();
        return list;
    }

    //转化为entity
    public static <T> List<T> doImportEntity(Class<T> tClass, File file
            , String[] header) {
        List<T> list = new ArrayList<>();
        ExcelImportUtil excelImportUtil = new ExcelImportUtil(
                file
                , header
                //遍历消费者中的数据,加入list
                , stringStringMap -> {
            list.add(BeanUtil.mapToBean(stringStringMap, tClass, true));
        });
        // 执行解析
        excelImportUtil.process();
        return list;
    }

    public static void doImport(File file
            , String[] headers
            , Consumer<Map<String, String>> consumer) {
        ExcelImportUtil excelImportUtil = new ExcelImportUtil(
                file
                , headers
                , consumer);
        // 执行解析
        excelImportUtil.process();
    }

    /**
     * 解析Excel,并关闭流
     */
    public void process() {
        try {
            // 只读字符表
            ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
            // Xssf读取
            XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
            // 样式表
            StylesTable styles = xssfReader.getStylesTable();
            // 读取Excel
            XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
            while (iter.hasNext()) {
                try (InputStream stream = iter.next()) {
                    processSheet(styles
                            , strings
                            , new SimpleSheetContentsHandler(headers
                                    , consumer
                                    , readFirst
                                    , readNullRaw)
                            , stream);
                }
                // 当自读第一个sheet是结束
                if (readFirstSheetOnly) {
                    break;
                }
            }
        } catch (IOException | SAXException | OpenXML4JException e) {
            log.error(e.getMessage(), e);
        } finally {
            // 关流
            try {
                this.xlsxPackage.close();
            } catch (IOException e) {
                log.error(e.getMessage(), e);
            }
        }
    }

    /**
     * 解析一个表的内容使用指定的样式和共享字符串表
     *
     * @param styles           可以通过表中的单元格引用的样式表
     * @param strings          可以通过表中的单元格引用的字符串表
     * @param sheetInputStream 这条流读取了文件数据
     * @throws IOException  来自解析器的IO异常,可能来自字节流或字符流
     * @throws SAXException 如果解析XML数据失败
     */
    private void processSheet(
            Styles styles,
            SharedStrings strings,
            SheetContentsHandler sheetHandler,
            InputStream sheetInputStream) throws IOException, SAXException {
        // 数据格式化对象
        DataFormatter formatter = new DataFormatter();
        // short date 数据读取
        formatter.addFormat("m/d/yy", new SimpleDateFormat("yyyy/MM/dd"));
        InputSource sheetSource = new InputSource(sheetInputStream);
        try {
            // xml读取类
            XMLReader sheetParser = SAXHelper.newXMLReader();
            // xml处理方法
            ContentHandler handler = new XSSFSheetXMLHandler(
                    styles
                    , null
                    , strings
                    , sheetHandler
                    , formatter
                    , false);
            sheetParser.setContentHandler(handler);

            // 解析Excel
            sheetParser.parse(sheetSource);
        } catch (ParserConfigurationException e) {
            throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
        }
    }

    /**
     * 数据处理
     */
    private class SimpleSheetContentsHandler implements SheetContentsHandler {
        /**
         * 当前行号
         */
        private int currentRow = -1;
        /**
         * 当前列号
         */
        private int currentCol = -1;
        /**
         * 当前行数据
         */
        private Map<String, String> rowData;
        /**
         * 数据头
         */
        private String[] headers;
        /**
         * 数据处理方法
         */
        private Consumer<Map<String, String>> consumer;
        /**
         * 是否读取第一行
         */
        private boolean readFirst;
        /**
         * 是否读取空行
         */
        private boolean readNullRaw;

        /**
         * sheet页处理
         *
         * @param headers  表头数组
         * @param consumer 行消费
         */
        SimpleSheetContentsHandler(String[] headers
                , Consumer<Map<String, String>> consumer) {
            this(headers, consumer, false, false);
        }

        /**
         * sheet页处理
         *
         * @param headers     表头数组
         * @param consumer    行消费
         * @param readFirst   是否读取第一行
         * @param readNullRaw 是否读取空行
         */
        public SimpleSheetContentsHandler(String[] headers
                , Consumer<Map<String, String>> consumer
                , boolean readFirst
                , boolean readNullRaw) {
            this.headers = headers;
            this.consumer = consumer;
            this.readFirst = readFirst;
            this.readNullRaw = readNullRaw;
        }

        @Override
        public void startRow(int rowNum) {
            currentRow = rowNum;
            currentCol = -1;
            // 重置数据
            rowData = new HashMap<>(headers.length);
        }

        @Override
        public void endRow(int rowNum) {
            // 不处理第一行
            if (!readFirst && rowNum == 0) {
                return;
            }
            // 空行处理
            if (!readNullRaw && rowData.isEmpty()) {
                return;
            }
            // 处理数据
            consumer.accept(rowData);
        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            // 如果为空则生成一个当前位置的单元格对象
            if (cellReference == null) {
                cellReference = new CellAddress(currentRow, currentCol).formatAsString();
            }

            // 读取的列号
            int thisCol = (new CellReference(cellReference)).getCol();
            // 大于头的列不处理
            if (thisCol >= this.headers.length) {
                return;
            }
            // 列为空的数量
            int missedCols = thisCol - currentCol - 1;
            for (int i = 0; i < missedCols; i++) {
                rowData.put(headers[currentCol + i + 1], null);
            }
            // 当前行等于读取的列
            currentCol = thisCol;
            // 大于头的列不处理
            if (currentCol > headers.length) {
                return;
            }
            // 设置值
            rowData.put(headers[currentCol], formattedValue);
        }
    }
}

4、FileUtil

package com.example.util;

import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;

import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Objects;

@Slf4j
public class FileUtil {

    /**
     * MultipartFile 转 File
     *
     * @param multipartFile 上传文件
     * @return 转换文件
     */
    public static File multipartFileToFile(@RequestParam MultipartFile multipartFile) {

        try {
            File file = null;
            if (multipartFile == null || multipartFile.getSize() <= 0) {
                throw new RuntimeException("转换文件异常");
            } else {
                InputStream ins = null;
                ins = multipartFile.getInputStream();
                file = new File(Objects.requireNonNull(multipartFile.getOriginalFilename()));
                inputStreamToFile(ins, file);
                ins.close();
                return file;
            }
        } catch (Exception e) {
            log.error(e.getMessage(), e);
        }
        return null;
    }


    public static void inputStreamToFile(InputStream ins, File file) {
        try {
            OutputStream os = new FileOutputStream(file);
            int bytesRead = 0;
            byte[] buffer = new byte[8192];
            while ((bytesRead = ins.read(buffer, 0, 8192)) != -1) {
                os.write(buffer, 0, bytesRead);
            }
            os.close();
            ins.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

###5、校验

package com.example.util;

import cn.hutool.core.bean.BeanUtil;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.util.StrUtil;

import javax.validation.ConstraintViolation;
import javax.validation.Validation;
import javax.validation.Validator;
import javax.validation.groups.Default;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;


public class DataCheck {
    private static Validator validator = Validation.buildDefaultValidatorFactory().getValidator();

    private DataCheck() {
    }

    /**
     * 校验数据(抛出异常)
     *
     * @param t 数据
     */
    public static <T> void valid(T t) {
        List<String> list = validMessage(t);
        if (CollectionUtil.isNotEmpty(list)) {
            throw new RuntimeException(StrUtil.join(",", list));
        }
    }

    /**
     * 校验数据(抛出异常)
     *
     * @param t      数据
     * @param groups 校验分组
     */
    public static <T> void valid(T t, Class<?>... groups) {
        List<String> list = validMessage(t, groups);
        if (CollectionUtil.isNotEmpty(list)) {
            throw new RuntimeException(StrUtil.join(",", list));
        }
    }

    /**
     * 校验数据
     * 返回所有异常信息
     *
     * @param t 数据
     * @return 所有异常信息
     */
    public static <T> List<String> validMessage(T t) {
        List<String> list = new ArrayList<>();
        validator.validate(t, Default.class).forEach(v -> {
            list.add(v.getMessage());
        });
        return list;
    }

    /**
     * 校验数据
     * 返回所有异常信息
     *
     * @param t      数据
     * @param groups 校验分组
     * @return 所有异常信息
     */
    public static <T> List<String> validMessage(T t, Class<?>... groups) {
        List<String> list = new ArrayList<>();
        validator.validate(t, groups).forEach(v -> {
            list.add(v.getMessage());
        });
        return list;
    }

    /**
     * 是否校验通过
     *
     * @param t 数据
     * @return 是否通过
     */
    public static <T> boolean validBool(T t) {
        Set<ConstraintViolation<T>> violationSet = validator.validate(t, Default.class);
        return violationSet.size() == 0;
    }

    /**
     * 是否校验通过
     *
     * @param t      数据
     * @param groups 校验分组
     * @return 是否通过
     */
    public static <T> boolean validBool(T t, Class<?>... groups) {
        Set<ConstraintViolation<T>> violationSet = validator.validate(t, groups);
        return violationSet.size() == 0;
    }

    /**
     * 校验数据是否全部b不为空
     *
     * @param data 需要校验的对象
     * @return 是否都不为空
     */
    public static boolean checkNotNull(Object data) {
        AtomicLong total = new AtomicLong();
        Map<String, Object> map = BeanUtil.beanToMap(data);
        map.forEach((s, o) -> {
            if (o == null) {
                total.getAndIncrement();
            }
        });
        return total.get() == 0;
    }

    /**
     * 校验数据是否全部b不为空
     *
     * @param data 需要校验的对象
     * @param num  不为空的属性个数
     * @return 判断不为空的属性是否大于等于total
     */
    public static boolean check(Object data, int num) {
        AtomicLong total = new AtomicLong();
        Map<String, Object> map = BeanUtil.beanToMap(data);
        map.forEach((s, o) -> {
            if (o != null) {
                total.getAndIncrement();
            }
        });

        return total.get() >= num;
    }

}

6、使用

public class StudentServiceImpl extends ServiceImpl<StudentMapper, Student> implements IStudentService, ReportInFo {
    
    @Override
    public void doImportStudentExcel(MultipartFile multipartFile) {

        //需要转换的字段
        String[] fileds = {"name", "age", "mony"};
        File file = FileUtil.multipartFileToFile(multipartFile);
        List<StudentVo> studentVos = ExcelImportUtil.doImportEntity(StudentVo.class,
                file, fileds);

        //校验是否符合数据塞选规则,得到失败队列
        List<StudentVo> vos = StudentVo.check(studentVos);
        vos.forEach(
                //输出失败队列中的数据
                System.out::println
        );

        //删除零时文件
        cn.hutool.core.io.FileUtil.del(file);
    }
}