多线程处理Excel表格中的数据

60 阅读5分钟

多线程处理表格中的数据

需求内容介绍

本文多线程处理Excel表格中数据的需求情况:
1.读取给定表格中的数据
2.通过调用接口进行查询出每条数据材料信息
3.对查询出的材料信息生成文件到指定目录中

表格数据处理

表格的数据格式

image.png

多线程处理代码

package com.bb.admin.jackson;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.zjtzsw.biz.common.utils.ExcelUtil;
import com.zjtzsw.biz.common.utils.PoiExcelUtil;
import com.zjtzsw.biz.modules.sys.entity.Sc01Entity;
import com.zjtzsw.biz.modules.yth.model.BjxxInfo;
import com.zjtzsw.yth.core.api.rpc.HttpRpcUtil;
import com.zjtzsw.yth.core.jackjson.DictBeanSerializerModifier;
import com.zjtzsw.yth.core.jackjson.JsonUtil;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;

@Slf4j
public class CapTest {

   public static void main(String[] args) throws JsonProcessingException, FileNotFoundException, InterruptedException {
      //使用list存储读取出的数据。因为是多线程,使用CopyOnWriteArrayList实现
      List<String> excelReadList = new CopyOnWriteArrayList();
      //多线程读取:定义读取表格数据的线程池,设置核心线程数100
      ExecutorService executorService = Executors.newFixedThreadPool(100);
      //多线程生成文件:定义文件生成的线程池,设置核心线程数100
      ExecutorService downLoadFileService = Executors.newFixedThreadPool(100);

      //读取的文件路肩
      String excelFilePath = "C:\Users\THINKPAD\Desktop\rylist.xlsx";
      //文件流进行处理
      FileInputStream fis;
      fis = new FileInputStream(excelFilePath);

      //此处的poiExcelUtil 是封装的一个工具类,根据传入的文件后缀进行区分,使用的都是Easypoi
      Workbook wb = PoiExcelUtil.getWorkBook(fis, "rylist.xlsx");
      Sheet sheet = wb.getSheetAt(0);
      System.out.println("行数:" + sheet.getLastRowNum());

      //使用countDownLatch 保证都读取完了在进行下一步操作
      CountDownLatch countDownLatch = new CountDownLatch(sheet.getLastRowNum());
      for (int i = 1; i < sheet.getLastRowNum() + 1; i++) {
         int finalI = i;
         executorService.execute(() -> {
            try {
               //读取身份证号数据,列下标从0开始,所以columnNum为4
               String idCard = ExcelUtil.getCellValue(sheet, finalI, 4);
               excelReadList.add(idCard);
               System.out.println("线程:" + Thread.currentThread().getName() + "把数据" + " \t" + idCard + "  放入到list中去了");
            } finally {
               // 任务完成后,计数器减一
               countDownLatch.countDown();
            }
         });
      }
      countDownLatch.await();
      executorService.shutdown();
      //到此处多线程读取数据完成
      System.out.println("最终的list大小为:" + excelReadList.size());

      //对数据进行拆分,每1000条进行处理 split 是拆分的核心方法
      List<List<String>> splitList = split(excelReadList, 1000);
      //每1000条list作为一个元素存入 splitList
      //孙环splitList
      for (int i = 0; i < splitList.size(); i++) {
         Map<String, Object> map = Maps.newHashMap();
         List<String> idList = splitList.get(i);
         map.put("idList", JSONUtil.toJsonStr(idList));

         //调用接口获取当前的数据的详细信息
         String post = HttpUtil.post("http://ip:port/interfacepath/details", map);
         System.out.println(post);
         List<Map> detailsMap = JSONUtil.toList(JSONUtil.parseArray(post), Map.class);
         for (int j = 0; j < detailsMap.size(); j++) {
            Map<String, String> m = detailsMap.get(i);
            //下载文件的多线程处理
            downLoadFileService.execute(() -> {
               String businessId = m.get("ID");//业务id
               String name = m.get("name"); //姓名
               String idCard = m.get("idCard"); //身份证号
               String areaCode = m.get("areaCode");//业务地行政区划
               String areaName = m.get("areaName");// 业务地名称
               String serialno = m.get("serialno"); //业务流水号
               String time = m.get("time"); //业务时间

               dealDataAndDownloadFiles(businessId, areaCode.substring(0, 4) + "00", areaCode, areaName, idCard, name, serialno, time);
            });
         }
      }
   }


   /**
    * 分割list
    * @param resList
    * @param subListLength
    * @param <T>
    * @return
    */
   public static <T> List<List<T>> split(List<T> resList, int subListLength) {
      if (CollectionUtils.isEmpty(resList) || subListLength <= 0) {
         return Lists.newArrayList();
      }
      List<List<T>> ret = Lists.newArrayList();
      int size = resList.size();
      if (size <= subListLength) {
         // 数据量不足 subListLength 指定的大小
         ret.add(resList);
      } else {
         int pre = size / subListLength;
         int last = size % subListLength;
         // 前面pre个集合,每个大小都是 subListLength 个元素
         for (int i = 0; i < pre; i++) {
            List<T> itemList = Lists.newArrayList();
            for (int j = 0; j < subListLength; j++) {
               itemList.add(resList.get(i * subListLength + j));
            }
            ret.add(itemList);
         }
         // last的进行处理
         if (last > 0) {
            List<T> itemList = Lists.newArrayList();
            for (int i = 0; i < last; i++) {
               itemList.add(resList.get(pre * subListLength + i));
            }
            ret.add(itemList);
         }
      }
      return ret;
   }


   /**
    * 处理数据并且下载材料
    * @param businessId
    * @param cityAreaCode
    * @param areaCode
    * @param areaName
    * @param idCard
    * @param name
    * @param serialno
    * @param time
    * @param sbtj
    */
   private static void dealDataAndDownloadFiles(String businessId, String cityAreaCode, String areaCode, String areaName, String idCard, String name, String serialno, String time) {
      {
         HashMap<String, Object> param = Maps.newHashMap();

         param.put("businessId", businessId);//业务系统主键
         param.put("idCard", idCard);//身份证
         param.put("name", name);//姓名
         //查询材料信息
         String post = HttpRpcUtil.post("http://ip:port/caller/remote").form(param).execute().body();
         System.out.println(post);
         cn.hutool.json.JSONObject jsonObject = JSONUtil.parseObj(post);
         cn.hutool.json.JSONObject dataObj = jsonObject.getJSONObject("data");

         JSONArray childrenArrayOut = dataObj.getJSONArray("children");
         //对获取到的材料数据进行处理,返回的是业务材料地址
         if (childrenArrayOut.size() != 0) {
            for (int i = 0; i < childrenArrayOut.size(); i++) {
               JSONObject innerData = childrenArrayOut.getJSONObject(i);
               JSONArray innerChildrenArray = innerData.getJSONArray("children");
               String bizTitle = innerData.getStr("bizTitle");
               if (innerChildrenArray.size() != 0) {
                  for (int j = 0; j < innerChildrenArray.size(); j++) {
                     String url = innerChildrenArray.getJSONObject(j).getStr("file");
                     System.out.println("下载文件的地址是:" + url);
                     String fileDest = "F:\downloadFileByExcelData\" + cityAreaCode + "\" + areaCode + "(" + areaName + ")" + "\" + idCard + "(" + name + ")" + "\" + serialno + "(" + time + ")" + "\" + bizTitle + "\";
                     String fileName = innerChildrenArray.getJSONObject(j).getStr("filename");
                     fileDest = fileDest + fileName;
                     System.out.println("文件放置的目录是:" + fileDest);
                     HttpUtil.downloadFile(url, fileDest);
                  }
               }
            }
         }
      }
   }
}


工具类 PoiExcelUtil 代码

public class PoiExcelUtil {

   //这里是上面代码里获取excel的方法
   public static Workbook getWorkBook(InputStream is, String fileName) {
      boolean isE2007 = false;
        //判断是否是excel2007格式
        if(fileName.endsWith("xlsx")){
            isE2007 = true;
        }
        try {
            //根据文件格式(2003或者2007)来初始化
            if(isE2007){
                return new XSSFWorkbook(is);
            }else{
               return new HSSFWorkbook(is);
            }
        }catch(Exception e) {
           throw new WordExportException("导入模板不符合规范");
        }
   }
   
   
   
   /**
    * 获取单元格的值
    * 
    * @param cell
    * @return
    */
   public static String getCellValue(Cell cell) {
      if (cell == null)
         return "";
      return cell.getStringCellValue();
   }

   /**
    * 合并单元格处理,获取合并行
    * 
    * @param sheet
    * @return List<CellRangeAddress>
    */
   public static List<CellRangeAddress> getCombineCell(Sheet sheet) {
      List<CellRangeAddress> list = new ArrayList<>();
      // 获得一个 sheet 中合并单元格的数量
      int sheetmergerCount = sheet.getNumMergedRegions();
      // 遍历所有的合并单元格
      for (int i = 0; i < sheetmergerCount; i++) {
         // 获得合并单元格保存进list中
         CellRangeAddress ca = sheet.getMergedRegion(i);
         list.add(ca);
      }
      return list;
   }

   public static int getRowNum(List<CellRangeAddress> listCombineCell, Cell cell, Sheet sheet) {
      int xr = 0;
      int firstC = 0;
      int lastC = 0;
      int firstR = 0;
      int lastR = 0;
      for (CellRangeAddress ca : listCombineCell) {
         // 获得合并单元格的起始行, 结束行, 起始列, 结束列
         firstC = ca.getFirstColumn();
         lastC = ca.getLastColumn();
         firstR = ca.getFirstRow();
         lastR = ca.getLastRow();
         if (cell.getRowIndex() >= firstR && cell.getRowIndex() <= lastR) {
            if (cell.getColumnIndex() >= firstC && cell.getColumnIndex() <= lastC) {
               xr = lastR;
            }
         }

      }
      return xr;

   }

   /**
    * 判断单元格是否为合并单元格,是的话则将单元格的值返回
    * 
    * @param listCombineCell
    *            存放合并单元格的list
    * @param cell
    *            需要判断的单元格
    * @param sheet
    *            sheet
    * @return
    */
   public static String isCombineCell(List<CellRangeAddress> listCombineCell, Cell cell, Sheet sheet) throws Exception {
      int firstC = 0;
      int lastC = 0;
      int firstR = 0;
      int lastR = 0;
      String cellValue = null;
      for (CellRangeAddress ca : listCombineCell) {
         // 获得合并单元格的起始行, 结束行, 起始列, 结束列
         firstC = ca.getFirstColumn();
         lastC = ca.getLastColumn();
         firstR = ca.getFirstRow();
         lastR = ca.getLastRow();
         if (cell.getRowIndex() >= firstR && cell.getRowIndex() <= lastR) {
            if (cell.getColumnIndex() >= firstC && cell.getColumnIndex() <= lastC) {
               Row fRow = sheet.getRow(firstR);
               Cell fCell = fRow.getCell(firstC);
               cellValue = getCellValue(fCell);
               break;
            }
         } else {
            cellValue = "";
         }
      }
      return cellValue;
   }

   /**
    * 获取合并单元格的值
    * 
    * @param sheet
    * @param row
    * @param column
    * @return
    */
   public static String getMergedRegionValue(Sheet sheet, int row, int column) {
      int sheetMergeCount = sheet.getNumMergedRegions();

      for (int i = 0; i < sheetMergeCount; i++) {
         CellRangeAddress ca = sheet.getMergedRegion(i);
         int firstColumn = ca.getFirstColumn();
         int lastColumn = ca.getLastColumn();
         int firstRow = ca.getFirstRow();
         int lastRow = ca.getLastRow();

         if (row >= firstRow && row <= lastRow) {
            if (column >= firstColumn && column <= lastColumn) {
               Row fRow = sheet.getRow(firstRow);
               Cell fCell = fRow.getCell(firstColumn);
               return getCellValue(fCell);
            }
         }
      }

      return null;
   }

   /**
    * 判断指定的单元格是否是合并单元格
    * 
    * @param sheet
    * @param row
    *            行下标
    * @param column
    *            列下标
    * @return
    */
   public static boolean isMergedRegion(Sheet sheet, int row, int column) {
      int sheetMergeCount = sheet.getNumMergedRegions();
      for (int i = 0; i < sheetMergeCount; i++) {
         CellRangeAddress range = sheet.getMergedRegion(i);
         int firstColumn = range.getFirstColumn();
         int lastColumn = range.getLastColumn();
         int firstRow = range.getFirstRow();
         int lastRow = range.getLastRow();
         if (row >= firstRow && row <= lastRow) {
            if (column >= firstColumn && column <= lastColumn) {
               return true;
            }
         }
      }
      return false;
   }

}
执行后的结果:

image.png

总结

通过多线程处理Excel数据,主要的核心是通过把数据拆分成固定的批次去处理。 需要注意的点:

  • 线程安全:确保你的数据处理逻辑是线程安全的,尤其是在合并数据时。
  • 异常处理:在实际的项目应用中,需要做好异常处理逻辑。
  • 性能考量:定义合适的线程数量。