多线程处理表格中的数据
需求内容介绍
本文多线程处理Excel表格中数据的需求情况:
1.读取给定表格中的数据
2.通过调用接口进行查询出每条数据材料信息
3.对查询出的材料信息生成文件到指定目录中
表格数据处理
表格的数据格式
多线程处理代码
package com.bb.admin.jackson;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.zjtzsw.biz.common.utils.ExcelUtil;
import com.zjtzsw.biz.common.utils.PoiExcelUtil;
import com.zjtzsw.biz.modules.sys.entity.Sc01Entity;
import com.zjtzsw.biz.modules.yth.model.BjxxInfo;
import com.zjtzsw.yth.core.api.rpc.HttpRpcUtil;
import com.zjtzsw.yth.core.jackjson.DictBeanSerializerModifier;
import com.zjtzsw.yth.core.jackjson.JsonUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
@Slf4j
public class CapTest {
public static void main(String[] args) throws JsonProcessingException, FileNotFoundException, InterruptedException {
//使用list存储读取出的数据。因为是多线程,使用CopyOnWriteArrayList实现
List<String> excelReadList = new CopyOnWriteArrayList();
//多线程读取:定义读取表格数据的线程池,设置核心线程数100
ExecutorService executorService = Executors.newFixedThreadPool(100);
//多线程生成文件:定义文件生成的线程池,设置核心线程数100
ExecutorService downLoadFileService = Executors.newFixedThreadPool(100);
//读取的文件路肩
String excelFilePath = "C:\Users\THINKPAD\Desktop\rylist.xlsx";
//文件流进行处理
FileInputStream fis;
fis = new FileInputStream(excelFilePath);
//此处的poiExcelUtil 是封装的一个工具类,根据传入的文件后缀进行区分,使用的都是Easypoi
Workbook wb = PoiExcelUtil.getWorkBook(fis, "rylist.xlsx");
Sheet sheet = wb.getSheetAt(0);
System.out.println("行数:" + sheet.getLastRowNum());
//使用countDownLatch 保证都读取完了在进行下一步操作
CountDownLatch countDownLatch = new CountDownLatch(sheet.getLastRowNum());
for (int i = 1; i < sheet.getLastRowNum() + 1; i++) {
int finalI = i;
executorService.execute(() -> {
try {
//读取身份证号数据,列下标从0开始,所以columnNum为4
String idCard = ExcelUtil.getCellValue(sheet, finalI, 4);
excelReadList.add(idCard);
System.out.println("线程:" + Thread.currentThread().getName() + "把数据" + " \t" + idCard + " 放入到list中去了");
} finally {
// 任务完成后,计数器减一
countDownLatch.countDown();
}
});
}
countDownLatch.await();
executorService.shutdown();
//到此处多线程读取数据完成
System.out.println("最终的list大小为:" + excelReadList.size());
//对数据进行拆分,每1000条进行处理 split 是拆分的核心方法
List<List<String>> splitList = split(excelReadList, 1000);
//每1000条list作为一个元素存入 splitList
//孙环splitList
for (int i = 0; i < splitList.size(); i++) {
Map<String, Object> map = Maps.newHashMap();
List<String> idList = splitList.get(i);
map.put("idList", JSONUtil.toJsonStr(idList));
//调用接口获取当前的数据的详细信息
String post = HttpUtil.post("http://ip:port/interfacepath/details", map);
System.out.println(post);
List<Map> detailsMap = JSONUtil.toList(JSONUtil.parseArray(post), Map.class);
for (int j = 0; j < detailsMap.size(); j++) {
Map<String, String> m = detailsMap.get(i);
//下载文件的多线程处理
downLoadFileService.execute(() -> {
String businessId = m.get("ID");//业务id
String name = m.get("name"); //姓名
String idCard = m.get("idCard"); //身份证号
String areaCode = m.get("areaCode");//业务地行政区划
String areaName = m.get("areaName");// 业务地名称
String serialno = m.get("serialno"); //业务流水号
String time = m.get("time"); //业务时间
dealDataAndDownloadFiles(businessId, areaCode.substring(0, 4) + "00", areaCode, areaName, idCard, name, serialno, time);
});
}
}
}
/**
* 分割list
* @param resList
* @param subListLength
* @param <T>
* @return
*/
public static <T> List<List<T>> split(List<T> resList, int subListLength) {
if (CollectionUtils.isEmpty(resList) || subListLength <= 0) {
return Lists.newArrayList();
}
List<List<T>> ret = Lists.newArrayList();
int size = resList.size();
if (size <= subListLength) {
// 数据量不足 subListLength 指定的大小
ret.add(resList);
} else {
int pre = size / subListLength;
int last = size % subListLength;
// 前面pre个集合,每个大小都是 subListLength 个元素
for (int i = 0; i < pre; i++) {
List<T> itemList = Lists.newArrayList();
for (int j = 0; j < subListLength; j++) {
itemList.add(resList.get(i * subListLength + j));
}
ret.add(itemList);
}
// last的进行处理
if (last > 0) {
List<T> itemList = Lists.newArrayList();
for (int i = 0; i < last; i++) {
itemList.add(resList.get(pre * subListLength + i));
}
ret.add(itemList);
}
}
return ret;
}
/**
* 处理数据并且下载材料
* @param businessId
* @param cityAreaCode
* @param areaCode
* @param areaName
* @param idCard
* @param name
* @param serialno
* @param time
* @param sbtj
*/
private static void dealDataAndDownloadFiles(String businessId, String cityAreaCode, String areaCode, String areaName, String idCard, String name, String serialno, String time) {
{
HashMap<String, Object> param = Maps.newHashMap();
param.put("businessId", businessId);//业务系统主键
param.put("idCard", idCard);//身份证
param.put("name", name);//姓名
//查询材料信息
String post = HttpRpcUtil.post("http://ip:port/caller/remote").form(param).execute().body();
System.out.println(post);
cn.hutool.json.JSONObject jsonObject = JSONUtil.parseObj(post);
cn.hutool.json.JSONObject dataObj = jsonObject.getJSONObject("data");
JSONArray childrenArrayOut = dataObj.getJSONArray("children");
//对获取到的材料数据进行处理,返回的是业务材料地址
if (childrenArrayOut.size() != 0) {
for (int i = 0; i < childrenArrayOut.size(); i++) {
JSONObject innerData = childrenArrayOut.getJSONObject(i);
JSONArray innerChildrenArray = innerData.getJSONArray("children");
String bizTitle = innerData.getStr("bizTitle");
if (innerChildrenArray.size() != 0) {
for (int j = 0; j < innerChildrenArray.size(); j++) {
String url = innerChildrenArray.getJSONObject(j).getStr("file");
System.out.println("下载文件的地址是:" + url);
String fileDest = "F:\downloadFileByExcelData\" + cityAreaCode + "\" + areaCode + "(" + areaName + ")" + "\" + idCard + "(" + name + ")" + "\" + serialno + "(" + time + ")" + "\" + bizTitle + "\";
String fileName = innerChildrenArray.getJSONObject(j).getStr("filename");
fileDest = fileDest + fileName;
System.out.println("文件放置的目录是:" + fileDest);
HttpUtil.downloadFile(url, fileDest);
}
}
}
}
}
}
}
工具类 PoiExcelUtil 代码
public class PoiExcelUtil {
//这里是上面代码里获取excel的方法
public static Workbook getWorkBook(InputStream is, String fileName) {
boolean isE2007 = false;
//判断是否是excel2007格式
if(fileName.endsWith("xlsx")){
isE2007 = true;
}
try {
//根据文件格式(2003或者2007)来初始化
if(isE2007){
return new XSSFWorkbook(is);
}else{
return new HSSFWorkbook(is);
}
}catch(Exception e) {
throw new WordExportException("导入模板不符合规范");
}
}
/**
* 获取单元格的值
*
* @param cell
* @return
*/
public static String getCellValue(Cell cell) {
if (cell == null)
return "";
return cell.getStringCellValue();
}
/**
* 合并单元格处理,获取合并行
*
* @param sheet
* @return List<CellRangeAddress>
*/
public static List<CellRangeAddress> getCombineCell(Sheet sheet) {
List<CellRangeAddress> list = new ArrayList<>();
// 获得一个 sheet 中合并单元格的数量
int sheetmergerCount = sheet.getNumMergedRegions();
// 遍历所有的合并单元格
for (int i = 0; i < sheetmergerCount; i++) {
// 获得合并单元格保存进list中
CellRangeAddress ca = sheet.getMergedRegion(i);
list.add(ca);
}
return list;
}
public static int getRowNum(List<CellRangeAddress> listCombineCell, Cell cell, Sheet sheet) {
int xr = 0;
int firstC = 0;
int lastC = 0;
int firstR = 0;
int lastR = 0;
for (CellRangeAddress ca : listCombineCell) {
// 获得合并单元格的起始行, 结束行, 起始列, 结束列
firstC = ca.getFirstColumn();
lastC = ca.getLastColumn();
firstR = ca.getFirstRow();
lastR = ca.getLastRow();
if (cell.getRowIndex() >= firstR && cell.getRowIndex() <= lastR) {
if (cell.getColumnIndex() >= firstC && cell.getColumnIndex() <= lastC) {
xr = lastR;
}
}
}
return xr;
}
/**
* 判断单元格是否为合并单元格,是的话则将单元格的值返回
*
* @param listCombineCell
* 存放合并单元格的list
* @param cell
* 需要判断的单元格
* @param sheet
* sheet
* @return
*/
public static String isCombineCell(List<CellRangeAddress> listCombineCell, Cell cell, Sheet sheet) throws Exception {
int firstC = 0;
int lastC = 0;
int firstR = 0;
int lastR = 0;
String cellValue = null;
for (CellRangeAddress ca : listCombineCell) {
// 获得合并单元格的起始行, 结束行, 起始列, 结束列
firstC = ca.getFirstColumn();
lastC = ca.getLastColumn();
firstR = ca.getFirstRow();
lastR = ca.getLastRow();
if (cell.getRowIndex() >= firstR && cell.getRowIndex() <= lastR) {
if (cell.getColumnIndex() >= firstC && cell.getColumnIndex() <= lastC) {
Row fRow = sheet.getRow(firstR);
Cell fCell = fRow.getCell(firstC);
cellValue = getCellValue(fCell);
break;
}
} else {
cellValue = "";
}
}
return cellValue;
}
/**
* 获取合并单元格的值
*
* @param sheet
* @param row
* @param column
* @return
*/
public static String getMergedRegionValue(Sheet sheet, int row, int column) {
int sheetMergeCount = sheet.getNumMergedRegions();
for (int i = 0; i < sheetMergeCount; i++) {
CellRangeAddress ca = sheet.getMergedRegion(i);
int firstColumn = ca.getFirstColumn();
int lastColumn = ca.getLastColumn();
int firstRow = ca.getFirstRow();
int lastRow = ca.getLastRow();
if (row >= firstRow && row <= lastRow) {
if (column >= firstColumn && column <= lastColumn) {
Row fRow = sheet.getRow(firstRow);
Cell fCell = fRow.getCell(firstColumn);
return getCellValue(fCell);
}
}
}
return null;
}
/**
* 判断指定的单元格是否是合并单元格
*
* @param sheet
* @param row
* 行下标
* @param column
* 列下标
* @return
*/
public static boolean isMergedRegion(Sheet sheet, int row, int column) {
int sheetMergeCount = sheet.getNumMergedRegions();
for (int i = 0; i < sheetMergeCount; i++) {
CellRangeAddress range = sheet.getMergedRegion(i);
int firstColumn = range.getFirstColumn();
int lastColumn = range.getLastColumn();
int firstRow = range.getFirstRow();
int lastRow = range.getLastRow();
if (row >= firstRow && row <= lastRow) {
if (column >= firstColumn && column <= lastColumn) {
return true;
}
}
}
return false;
}
}
执行后的结果:
总结
通过多线程处理Excel数据,主要的核心是通过把数据拆分成固定的批次去处理。 需要注意的点:
- 线程安全:确保你的数据处理逻辑是线程安全的,尤其是在合并数据时。
- 异常处理:在实际的项目应用中,需要做好异常处理逻辑。
- 性能考量:定义合适的线程数量。