JAVA操作Word合并、替换占位符、Word插入富文本、生成水印

6,047 阅读10分钟

文章概览

引入POI类库及注意事项

Java操作Word用到的工具类库是基于POI4.1.0版本的,poi官方API,可以使用Google自带的全文翻译,很方便。注意文章中操作的Word都是docx后缀的,即Word2007版本,如果需要操作Word2003版本还需自行转换。

后续将更新从Excel读取表格数据写入到Word,从另一个Word读取模板表格到当前Word,项目代码中每一个功能都提供了test类,你需要拉下代码修改文件目录即可执行,一步到位。

下面开始进入主题,文章中只贴关键代码,全部代码请通过传送门去GitHub拉取,如果感觉对你有帮助请在GitHub上点亮你尊贵的小星星,码砖不易,转载请说明出处,谢谢。

pox.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.corey</groupId>
    <artifactId>wordtools</artifactId>
    <version>1.0-SNAPSHOT</version>

    <dependencies>
        <!-- !! POI依赖包  -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>4.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>4.1.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>4.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>4.1.0</version>
        </dependency>
        <!-- POI依赖包  !!-->

        <!--out net begin -->
        <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j</artifactId>
            <version>3.3.6</version>
        </dependency>
        <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j-ImportXHTML</artifactId>
            <version>3.3.6</version>
        </dependency>
        <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j-export-fo</artifactId>
            <version>3.3.6</version>
        </dependency>
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.11.2</version>
        </dependency>
        <!--out net end -->
        <!-- https://mvnrepository.com/artifact/org.springframework/spring-core -->
        <!--只是使用到用spring的工具类-->
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-core</artifactId>
            <version>5.2.1.RELEASE</version>
        </dependency>
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.5</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/javax.servlet/javax.servlet-api -->
        <dependency>
            <groupId>javax.servlet</groupId>
            <artifactId>javax.servlet-api</artifactId>
            <version>4.0.1</version>
            <scope>provided</scope>
        </dependency>
    </dependencies>
</project>

多个Word文档合并

POI合并文档的基本思路,Word本身是一个xml文件,通过把不同xml的Xmlns去重合并,添加固定的格式标签,然后把不同xml里面的元素都拼接到一起,组成一个新的xml文件,输出成为一个新的Word。更多代码请查看项目的magerword目录。

package magerword;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
import org.springframework.util.ObjectUtils;

import java.io.*;
import java.util.*;

/**
* @program: 合并多份word文件
* @description:
* @author: corey
* @create: 2020-04-29 19:04
**/
public class MagerUtil {
   /**
    * 合并多个Word
    * @param filepaths
    * @throws Exception
    */
   public static void mergeDoc(String... filepaths) throws Exception {
       // 需要配置导出文件路径 记得替换为自己电脑的路径
       OutputStream dest = new FileOutputStream("/Users/corey/Desktop/temp/wordtools/合并文档3.docx");
       List<CTBody> ctBodyList = new ArrayList<>();
       List<XWPFDocument> srcDocuments = new ArrayList<>();
       for (String filepath : filepaths) {
           InputStream in = null;
           OPCPackage srcPackage = null;
           try {
               in = new FileInputStream(filepath);
               srcPackage = OPCPackage.open(in);
           } catch (Exception e) {
               e.printStackTrace();
           } finally {
               closeStream(in);
           }
           XWPFDocument srcDocument = new XWPFDocument(srcPackage);
           CTBody srcBody = srcDocument.getDocument().getBody();
           ctBodyList.add(srcBody);
           srcDocuments.add(srcDocument);
       }
       if (!ObjectUtils.isEmpty(ctBodyList)) {
           appendBody(ctBodyList);
           srcDocuments.get(0).write(dest);
       }
   }

   /**
    * 拼接所有的文档元素
    * @param ctBodyList
    * @throws Exception
    */
   private static void appendBody(List<CTBody> ctBodyList) throws Exception {
       XmlOptions optionsOuter = new XmlOptions();
       optionsOuter.setSaveOuter();
       // 所有的xmlns
       StringBuffer allAmlns = new StringBuffer();
       // 所有文档的内部元素
       StringBuffer allElement = new StringBuffer();
       ctBodyList.forEach(ct -> {
           // 拿到每一个文档的完整xml
           String appentString = ct.xmlText();
           // 拼接所有的xmlns
           allAmlns.append(appentString.substring(appentString.indexOf("xmlns"), appentString.indexOf(">")));
           // 拼接所有的内部元素
           allElement.append(appentString.substring(appentString.indexOf(">") + 1, appentString.lastIndexOf("</")));
       });
       // 将xmlns去重
       String distinctPrefix = distinctXmlns(allAmlns.toString());
       // 合并文档
       CTBody makeBody = CTBody.Factory.parse(distinctPrefix + allElement.toString() + "</xml-fragment>");
       ctBodyList.get(0).set(makeBody);
   }

   /**
    * 去重合并xml的Xmlns
    *
    * @param prefix
    * @return
    */
   public static String distinctXmlns(String prefix) {
       int start = prefix.indexOf("xmlns");
       int end = prefix.indexOf("xmlns", start + 1);
       Set s = new HashSet();
       while (end > 0) {
           s.add(prefix.substring(start, end));
           start = end;
           end = prefix.indexOf("xmlns", start + 1);
       }
       String xmlHead = "<xml-fragment ";
       StringBuffer sb = new StringBuffer(xmlHead);
       Map<String, String> map = distinctXmlns(s);
       for (Map.Entry<String, String> entry : map.entrySet()) {
           sb.append(" ");
           sb.append(entry.getKey());
           sb.append("=");
           sb.append(entry.getValue());
       }
       sb.append(">");
       return sb.toString();
   }

   /**
    * xmlns 可能存在xmlns头相同但是指向地址不同的情况
    *
    * @param set
    * @return
    */
   public static Map<String, String> distinctXmlns(Set set) {
       Map<String, String> map = new HashMap();
       Iterator i = set.iterator();
       while (i.hasNext()) {
           String xmlns = (String) i.next();
           map.put(xmlns.substring(0, xmlns.indexOf("=")), xmlns.substring(xmlns.indexOf("=") + 1));
       }
       return map;
   }

   /**
    * 关闭流
    * 这一步可以放到公用工具类中,close的类型可以使用Closeable,这样就可以关闭input和output的流
    * @param inputStream
    */
   public static void closeStream(InputStream... inputStream) {
       for (InputStream i : inputStream) {
           if (i != null) {
               try {
                   i.close();
               } catch (IOException e) {
                   e.printStackTrace();
               }
           }
       }

   }
}

替换文档中的占位符,包含段落占位符、表格占位符

替换占位符的思路,首先需要遍历文档中所有的段落和表格,再去一个个匹配占位符与你需要替换的参数,Word中段落是XWPFParagraph对象,表格是XWPFTable对象。更多代码请查看项目的replacemark目录。

package replacemark;

import org.apache.poi.xwpf.usermodel.*;
import org.springframework.util.StringUtils;

import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 *  替换文档中的段落和表格占位符
 * @author corey
 * @version 1.0
 * @date 2020/5/9 9:14 上午
 */
public class ReplaceUtil {

    /**
     * 替换段落中的占位符
     * @param doc 需要替换的文档
     * @param params 替换的参数,key=占位符,value=实际值
     */
    public static void replaceInPara(XWPFDocument doc, Map<String,Object> params)  {
        Iterator<XWPFParagraph> iterator = doc.getParagraphsIterator();
        XWPFParagraph para;
        while (iterator.hasNext()) {
            para = iterator.next();
            if(!StringUtils.isEmpty(para.getParagraphText())){
                replaceInPara(para, params);
            }
        }
    }

    /**
     * 替换段落中的占位符
     * @param para
     */
    public static void replaceInPara(XWPFParagraph para, Map<String,Object> params)  {
        // 获取当前段落的文本
        String sourceText = para.getParagraphText();
        // 控制变量
        boolean replace = false;
        for (Map.Entry<String, Object> entry : params.entrySet()) {
            String key = entry.getKey();
            if(sourceText.indexOf(key)!=-1){
                Object value = entry.getValue();
                if(value instanceof String){
                    // 替换文本占位符
                    sourceText = sourceText.replace(key, value.toString());
                    replace = true;
                }
            }
        }
        if(replace){
            // 获取段落中的行数
            List<XWPFRun> runList = para.getRuns();
            for (int i=runList.size();i>=0;i--){
                // 删除之前的行
                para.removeRun(i);
            }
            // 创建一个新的文本并设置为替换后的值 这样操作之后之前文本的样式就没有了,待改进
            para.createRun().setText(sourceText);
        }
    }

    /**
     * 替换表格中的占位符
     * @param doc
     * @param params
     */
    public static void replaceTable(XWPFDocument doc,Map<String,Object> params){
        // 获取文档中所有的表格
        Iterator<XWPFTable> iterator = doc.getTablesIterator();
        XWPFTable table;
        List<XWPFTableRow> rows;
        List<XWPFTableCell> cells;
        List<XWPFParagraph> paras;
        while (iterator.hasNext()) {
            table = iterator.next();
            if (table.getRows().size() > 1) {
                //判断表格是需要替换还是需要插入,判断逻辑有${为替换,
                if (matcher(table.getText()).find()) {
                    rows = table.getRows();
                    for (XWPFTableRow row : rows) {
                        cells = row.getTableCells();
                        for (XWPFTableCell cell : cells) {
                            paras = cell.getParagraphs();
                            for (XWPFParagraph para : paras) {
                                replaceInPara(para, params);
                            }
                        }
                    }
                }
            }
        }
    }

    /**
     * 正则匹配字符串
     *
     * @param str
     * @return
     */
    private static Matcher matcher(String str) {
        Pattern pattern = Pattern.compile("\\$\\{(.+?)\\}", Pattern.CASE_INSENSITIVE);
        Matcher matcher = pattern.matcher(str);
        return matcher;
    }
}

富文本转Word及注意事项

富文本转成Word的思路,富文本本身就是一段HTML字符串,可以直接把这段字符串当做一个段落写入到Word中,但这样会丢失HTML样式,所以需要将识别到的HTML标签替换成Word标签,这也是难点所在,所以需要设计一个大而全的样式替换工具,目前笔者的项目中只做H1\H2\H3\段落\表格\img的src是url的图片转换(base64流放在富文本中太大了,不易识别),再提一句这些替换的工具可以设计为责任链模式,笔者也还没有这样做。更多代码在项目的insertword目录。

package insertword;


import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlCursor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.ObjectUtils;
import org.springframework.util.StringUtils;

import java.io.*;

/**
 * Html工具类
 * @author corey
 * @version 1.0
 * @date 2020/5/5 9:36 下午
 */
public class HtmlUtil {

    /**
     * 给document添加指定元素
     * @param document
     */
    public static void addElement(Document document){
        if(ObjectUtils.isEmpty(document)){
            throw new NullPointerException("不允许为空的对象添加元素");
        }
        Elements elements = document.getAllElements();
        for(Element e:elements){
            String attrName = ElementEnum.getValueByCode(e.tag().getName());
            if(!StringUtils.isEmpty(attrName)) {
                e.attr(CommonConStant.COMMONATTR, attrName);
            }
        }
    }

    /**
     * 将富文本内容写入到Word
     * 因富文本样式种类繁多,不能一一枚举,目前实现了H1、H2、H3、段落、图片、表格枚举
     * @param ritchText 富文本内容
     * @param doc 需要写入富文本内容的Word 写入图片和表格需要用到
     * @param paragraph
     */
    public static void resolveHtml(String ritchText, XWPFDocument doc, XWPFParagraph paragraph){
        Document document = Jsoup.parseBodyFragment(ritchText, "UTF-8");
        try {
            // 添加固定元素
            HtmlUtil.addElement(document);
            Elements elements = document.select("["+CommonConStant.COMMONATTR+"]");
            for (Element em : elements) {
                XmlCursor xmlCursor = paragraph.getCTP().newCursor();
                switch (em.attr(CommonConStant.COMMONATTR)) {
                    case "title":
                        break;
                    case "subtitle":
                        break;
                    case "imgurl":
                        String url = em.attr("src");
                        InputStream inputStream = new FileInputStream(url);
                        XWPFParagraph imgurlparagraph = doc.insertNewParagraph(xmlCursor);
                        //居中
                        ParagraphStyleUtil.setImageCenter(imgurlparagraph);
                        imgurlparagraph.createRun().addPicture(inputStream,XWPFDocument.PICTURE_TYPE_PNG,"图片.jpeg", Units.toEMU(200),Units.toEMU(200));
                        closeStream(inputStream);
                        break;
                    case "imgbase64":
                        break;
                    case "table":
                        XWPFTable xwpfTable = doc.insertNewTbl(xmlCursor);
                        addTable(xwpfTable,em);
                        // 设置表格居中
                        ParagraphStyleUtil.setTableLocation(xwpfTable,"center");
                        // 设置内容居中
                        ParagraphStyleUtil.setCellLocation(xwpfTable,"CENTER","center");
                        break;
                    case "h1":
                        XWPFParagraph h1paragraph1 = doc.insertNewParagraph(xmlCursor);
                        XWPFRun xwpfRun_1 = h1paragraph1.createRun();
                        xwpfRun_1.setText(em.text());
                        // 设置字体
                        ParagraphStyleUtil.setTitle(xwpfRun_1, TitleFontEnum.H1.getTitle());
                        break;
                    case "h2":
                        XWPFParagraph h2paragraph = doc.insertNewParagraph(xmlCursor);
                        XWPFRun xwpfRun_2 = h2paragraph.createRun();
                        xwpfRun_2.setText(em.text());
                        // 设置字体
                        ParagraphStyleUtil.setTitle(xwpfRun_2, TitleFontEnum.H2.getTitle());
                        break;
                    case "h3":
                        XWPFParagraph h3paragraph = doc.insertNewParagraph(xmlCursor);
                        XWPFRun xwpfRun_3 = h3paragraph.createRun();
                        xwpfRun_3.setText(em.text());
                        // 设置字体
                        ParagraphStyleUtil.setTitle(xwpfRun_3, TitleFontEnum.H3.getTitle());
                        break;
                    case "paragraph":
                        XWPFParagraph paragraphd = doc.insertNewParagraph(xmlCursor);
                        // 设置段落缩进 4个空格
                        paragraphd.createRun().setText("    "+em.text());
                        break;
                    default:
                        break;
                }
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 读取txt文件的内容
     *
     * @param file 想要读取的文件对象
     * @return 返回文件内容
     */
    public static String txt2String(File file) {
        StringBuilder result = new StringBuilder();
        try {
            BufferedReader br = new BufferedReader(new FileReader(file));//构造一个BufferedReader类来读取文件
            String s = null;
            while ((s = br.readLine()) != null) {//使用readLine方法,一次读一行
                result.append(System.lineSeparator() + s);
            }
            br.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result.toString();
    }



    /**
     *   将富文本的表格转换为Word里面的表格
     */
    private static void addTable(XWPFTable xwpfTable,Element table) {
        Elements trs = table.getElementsByTag("tr");
        // XWPFTableRow 第0行特殊处理
        int rownum = 0;
        for (Element tr : trs) {
            addTableTr(xwpfTable,tr,rownum);
            rownum++;
        }
    }

    /**
     *  将元素里面的tr 提取到 xwpfTabel
     */
    private static void addTableTr(XWPFTable xwpfTable,Element tr,int rownum) {
        Elements tds = tr.getElementsByTag("th").isEmpty() ? tr.getElementsByTag("td") : tr.getElementsByTag("th");
        XWPFTableRow row_1 = null;
        for (int i = 0, j = tds.size(); i < j; i++) {
            if(0==rownum){
                // XWPFTableRow 第0行特殊处理,
                XWPFTableRow row_0 = xwpfTable.getRow(0);
                if(i==0){
                    row_0.getCell(0).setText(tds.get(i).text());
                }else{
                    row_0.addNewTableCell().setText(tds.get(i).text());
                }
            }else{
                if(i==0) {
                    // 换行需要创建一个新行
                    row_1 = xwpfTable.createRow();
                    row_1.getCell(i).setText(tds.get(i).text());
                }else {
                    row_1.getCell(i).setText(tds.get(i).text());
                }
            }
        }

    }

    /**
     * 关闭输入流
     *
     * @param closeables
     */
    public static void closeStream(Closeable... closeables) {
        for (Closeable  c: closeables) {
            if (c != null) {
                try {
                    c.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

    }
}

给Word生成水印

Word添加水印的思路,利用XWPFHeader对象创建页眉,给页眉添加文字,设置字体、大小、颜色、旋转角度即可。代码在项目的insertword目录

package insertword;

import com.microsoft.schemas.office.office.CTLock;
import com.microsoft.schemas.vml.*;
import org.apache.poi.wp.usermodel.HeaderFooterType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFHeader;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;

import java.util.stream.Stream;

/**
 * @desc 添加水印
 * @author corey
 * @version 1.0
 * @date 2020/5/5 10:07 下午
 */
public class WatermarkUtil {
    // word字体
    private static final String fontName = "宋体";
    // 字体大小
    private static final String fontSize = "0.2pt";
    // 字体颜色
    private static final String fontColor = "#d0d0d0";
    // 一个字平均长度,单位pt,用于:计算文本占用的长度(文本总个数*单字长度)
    private static  final Integer widthPerWord = 10;
    // 与顶部的间距
    private static Integer styleTop = 0;
    // 文本旋转角度
    private static final String styleRotation = "45";



    /**
     * 给文档添加水印
     * 此方法可以单独使用
     * @param doc
     * @param customText
     */
    public static void waterMarkDocXDocument(XWPFDocument doc,String customText){
        // 把整页都打上水印
        for (int lineIndex = -5; lineIndex < 20; lineIndex++) {
            styleTop = 100*lineIndex;
            waterMarkDocXDocument_0(doc,customText);
        }
    }



    /**
     * 为文档添加水印
     * @param doc 需要被处理的docx文档对象
     * @param customText 需要添加的水印文字
     */
    public static void waterMarkDocXDocument_0(XWPFDocument doc,String customText) {
        // 水印文字之间使用8个空格分隔
        customText = customText + repeatString(" ", 8);
        // 一行水印重复水印文字次数
        customText = repeatString(customText, 10);
        // 如果之前已经创建过 DEFAULT 的Header,将会复用
        XWPFHeader header = doc.createHeader(HeaderFooterType.DEFAULT);
        int size = header.getParagraphs().size();
        if (size == 0) {
            header.createParagraph();
        }
        CTP ctp = header.getParagraphArray(0).getCTP();
        byte[] rsidr = doc.getDocument().getBody().getPArray(0).getRsidR();
        byte[] rsidrdefault = doc.getDocument().getBody().getPArray(0).getRsidRDefault();
        ctp.setRsidP(rsidr);
        ctp.setRsidRDefault(rsidrdefault);
        CTPPr ppr = ctp.addNewPPr();
        ppr.addNewPStyle().setVal("Header");
        // 开始加水印
        CTR ctr = ctp.addNewR();
        CTRPr ctrpr = ctr.addNewRPr();
        ctrpr.addNewNoProof();
        CTGroup group = CTGroup.Factory.newInstance();
        CTShapetype shapetype = group.addNewShapetype();
        CTTextPath shapeTypeTextPath = shapetype.addNewTextpath();
        shapeTypeTextPath.setOn(STTrueFalse.T);
        shapeTypeTextPath.setFitshape(STTrueFalse.T);
        CTLock lock = shapetype.addNewLock();
        lock.setExt(STExt.VIEW);
        CTShape shape = group.addNewShape();
        shape.setId("PowerPlusWaterMarkObject");
        shape.setSpid("_x0000_s102");
        shape.setType("#_x0000_t136");
        // 设置形状样式(旋转,位置,相对路径等参数)
        shape.setStyle(getShapeStyle(customText));
        shape.setFillcolor(fontColor);
        // 字体设置为实心
        shape.setStroked(STTrueFalse.FALSE);
        // 绘制文本的路径
        CTTextPath shapeTextPath = shape.addNewTextpath();
        // 设置文本字体与大小
        shapeTextPath.setStyle("font-family:" + fontName + ";font-size:" + fontSize);
        shapeTextPath.setString(customText);
        CTPicture pict = ctr.addNewPict();
        pict.set(group);
    }

    /**
     * 构建Shape的样式参数
     * @param customText
     * @return
     */
    private static String getShapeStyle(String customText) {
        StringBuilder sb = new StringBuilder();
        // 文本path绘制的定位方式
        sb.append("position: ").append("absolute");
        // 计算文本占用的长度(文本总个数*单字长度)
        sb.append(";width: ").append(customText.length() * widthPerWord).append("pt");
        // 字体高度
        sb.append(";height: ").append("20pt");
        sb.append(";z-index: ").append("-251654144");
        sb.append(";mso-wrap-edited: ").append("f");
        // 设置水印的间隔,这是一个大坑,不能用top,必须要margin-top。
        sb.append(";margin-top: ").append(styleTop);
        sb.append(";mso-position-horizontal-relative: ").append("page");
        sb.append(";mso-position-vertical-relative: ").append("page");
        sb.append(";mso-position-vertical: ").append("left");
        sb.append(";mso-position-horizontal: ").append("center");
        sb.append(";rotation: ").append(styleRotation);
        return sb.toString();
    }

    /**
     * 将指定的字符串重复repeats次.
     */
    private static String repeatString(String pattern, int repeats) {
        StringBuilder buffer = new StringBuilder(pattern.length() * repeats);
        Stream.generate(() -> pattern).limit(repeats).forEach(buffer::append);
        return new String(buffer);
    }
}

传送门

Github地址

GitEE 码云

鸣谢

感谢项目中同事对Word操作提出的改善意见,让本代码得以顺利交付运行。感谢所有提供了源代码的博主。 感谢各位猿佬百忙之中抽空阅读、点赞、收藏,记得帮忙在GitHub上点亮你尊贵的小星星哦。