word\ppt\excel 预览工具

938 阅读3分钟

基础文档预览: 基础文档->html->pdf ->网页预览pdf

1、word to html

/**
所需依赖版本 包含 excel转html,word转html
 <dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>4.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>ooxml-schemas</artifactId>
    <version>1.4</version>
</dependency>
<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>xdocreport</artifactId>
    <version>2.0.2</version>
</dependency>
*/


import com.zmj.plat.exception.BusinessException;
import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;
import fr.opensagres.poi.xwpf.converter.core.FileURIResolver;
import fr.opensagres.poi.xwpf.converter.core.IXWPFConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

public class WordToHtml {

    public static void main(String[] args) {
        String filepath = "D:/resumeFile/";
        String fileName = "test.doc";
        String htmlName = "test.html";

        String suffix = fileName.substring(fileName.lastIndexOf(".") + 1);
        System.out.println(suffix);
       /* try {
            wordToHtml(filepath,fileName,htmlName);
        } catch (Exception exception) {
            exception.printStackTrace();
        }*/
    }

    public static void wordToHtml(String filepath,String fileName) throws IOException, TransformerException, ParserConfigurationException {
        String htmlName = fileName.substring(0,fileName.lastIndexOf(".")) + ".html";
        String file = filepath + fileName;
        File f = new File(file);
        if(f.exists()){
            if(f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")){
                word07ToHtml(filepath,fileName,htmlName);
            }else {
                word03ToHtml(filepath,fileName,htmlName);
            }
        }else{
            throw new BusinessException("文档不存在");
        }

    }

    public static void word07ToHtml(String filepath,String fileName,String htmlName)throws IOException {
        String file = filepath + fileName;
        File f = new File(file);
        if(f.exists()){
            if(f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")){
                // 1) 加载word文档生成 XWPFDocument对象
                InputStream in = new FileInputStream(f);
                XWPFDocument document = new XWPFDocument(in);
                // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
                File imageFolderFile = new File(filepath);
                XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
                options.setExtractor(new FileImageExtractor(imageFolderFile));
                options.setIgnoreStylesIfUnused(false);
                options.setFragment(true);
                // 3) 将 XWPFDocument转换成XHTML
                OutputStream out = new FileOutputStream(new File(filepath + htmlName));
                IXWPFConverter<XHTMLOptions> instance = XHTMLConverter.getInstance();
                instance.convert(document, out, options);
            }
        }else{
            throw new BusinessException("文档不存在");
        }
    }

    public static void word03ToHtml(String filepath,String fileName,String htmlName) throws IOException, TransformerException, ParserConfigurationException {
        final String imagepath = "D:/resumeFile/image"; //解析时候如果doc文件中有图片  图片会保存在此路径
        String file = filepath + fileName;
        InputStream input = new FileInputStream(new File(file));
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        //设置图片存放的位置
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
                File imgPath = new File(imagepath);
                if(!imgPath.exists()){//图片目录不存在则创建
                    imgPath.mkdirs();
                }
                File file = new File(imagepath + suggestedName);
                try {
                    OutputStream os = new FileOutputStream(file);
                    os.write(content);
                    os.close();
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                return imagepath + suggestedName;
            }
        });

        //解析word文档
        wordToHtmlConverter.processDocument(wordDocument);
        Document htmlDocument = wordToHtmlConverter.getDocument();

        File htmlFile = new File(filepath + htmlName);
        OutputStream outStream = new FileOutputStream(htmlFile);

        //也可以使用字符数组流获取解析的内容
//        ByteArrayOutputStream baos = new ByteArrayOutputStream();
//        OutputStream outStream = new BufferedOutputStream(baos);

        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);

        TransformerFactory factory = TransformerFactory.newInstance();
        Transformer serializer = factory.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");

        serializer.transform(domSource, streamResult);

        //也可以使用字符数组流获取解析的内容
//        String content = baos.toString();
//        System.out.println(content);
//        baos.close();
        outStream.close();
    }
}

2、excel to html

/**
所需依赖版本 包含 excel转html,word转html
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.0</version>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.0</version>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.4</version>
</dependency>

<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>2.0.2</version>
</dependency>
*/

import java.io.*;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import com.zmj.plat.exception.BusinessException;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.w3c.dom.Document;

public class ExcelToHtml {

    public static void main(String args[]) throws Exception {
        String rootPath = "D:\\";
        String fileUrl = "接口文档.xlsx";
        try {
            String htmlUrl = excelToHtml(rootPath, fileUrl);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static String excelToHtml(String filepath,String fileName)throws Exception{
        String htmlName = fileName.substring(0,fileName.lastIndexOf(".")) + ".html";
        String file = filepath + fileName;
        File f = new File(file);
        if(f.exists()){
            if(f.getName().endsWith(".xlsx") || f.getName().endsWith(".XLSX")){
                xlxsToHtml(filepath,fileName,htmlName);
            }else {
                xlsToHtml(filepath,fileName,htmlName);
            }
        }else{
            throw new BusinessException("文档不存在");
        }
        return null;
    }

    public static String xlsToHtml(String filePath, String fileName,String htmlName) throws IOException {
        InputStream input = null;
        // poi用于转换excel为html
        ExcelToHtmlConverter excelToHtmlConverter = null;
        try {
            input = new FileInputStream(filePath + fileName);
        } catch (FileNotFoundException e1) {
            e1.printStackTrace();
        }
        // poi用于读取excel文件的内容
        HSSFWorkbook excelBook = new HSSFWorkbook(input);
        try {
            excelToHtmlConverter = new ExcelToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        } catch (ParserConfigurationException e1) {
            e1.printStackTrace();
        }
        excelToHtmlConverter.processWorkbook(excelBook);
        List pics = excelBook.getAllPictures();
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {
                    pic.writeImageContent(new FileOutputStream(filePath + pic.suggestFullFileName()));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        // 该Document接口表示整个HTML或XML文档。从概念上讲,它是文档树的根,并提供对文档数据的主要访问。
        Document htmlDocument = (Document) excelToHtmlConverter.getDocument();
        // 字节数组输出流
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        // 以文档对象模型(DOM)树的形式充当转换源树的持有者。
        DOMSource domSource = new DOMSource(htmlDocument);
        // 充当转换结果的持有者,可以是XML,纯文本,HTML或其他形式的标记。
        StreamResult streamResult = new StreamResult(outStream);
        TransformerFactory tf = TransformerFactory.newInstance();
        // 处理来自各种源的XML,并将转换输出写入各种接收器。
        Transformer serializer = null;
        try {
            serializer = tf.newTransformer();
        } catch (TransformerConfigurationException e) {
            e.printStackTrace();
        }
        // 设置对转换有效的输出属性
        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        try {
            serializer.transform(domSource, streamResult);
        } catch (TransformerException e) {
            e.printStackTrace();
        }
        outStream.close();
        String content = new String(outStream.toByteArray());
        // 这里减去32 是因为我们的文件都是使用uuid动态命名的

        // 设置html文件的名称
        String htmlUrl = filePath + htmlName;
        String Path = htmlUrl.replace("\\", "/");
        /**
         * FileUtils.writeStringToFile(file, data, encoding),把字符串写进对应的文件中
         * file是新建的文件 data是写入的内容 encoding是编码格式
         */
        FileUtils.writeStringToFile(new File(Path, htmlName), content, "utf-8");
        return htmlUrl;
    }

    public static String xlxsToHtml(String filePath, String fileName,String htmlName) throws Exception {
        Workbook workbook = null;
        InputStream is = new FileInputStream(filePath + fileName);
        String htmlUrl = "";
        try {
            String html = "";
            workbook = new XSSFWorkbook(is);
            for (int numSheet = 0; numSheet < workbook.getNumberOfSheets(); numSheet++) {
                Sheet sheet = workbook.getSheetAt(numSheet);
                if (sheet == null) {
                    continue;
                }
                html += sheet.getSheetName() + "<br><br>";

                int firstRowIndex = sheet.getFirstRowNum();
                int lastRowIndex = sheet.getLastRowNum();
                html += "<table border='1' align='left'>";
                Row firstRow = sheet.getRow(firstRowIndex);
                if(firstRow == null){
                    continue;
                }
                for (int i = firstRow.getFirstCellNum(); i <= firstRow.getLastCellNum(); i++) {
                    Cell cell = firstRow.getCell(i);
                    String cellValue = getCellValue(cell, true);
                    html += "<th>" + cellValue + "</th>";
                }

                // 行
                for (int rowIndex = firstRowIndex + 1; rowIndex <= lastRowIndex; rowIndex++) {
                    Row currentRow = sheet.getRow(rowIndex);
                    html += "<tr>";
                    if (currentRow != null) {
                        int firstColumnIndex = currentRow.getFirstCellNum();
                        int lastColumnIndex = currentRow.getLastCellNum();
                        // 列
                        for (int columnIndex = firstColumnIndex; columnIndex <= lastColumnIndex; columnIndex++) {
                            if(columnIndex < 0){
                                continue;
                            }
                            Cell currentCell = currentRow.getCell(columnIndex);
                            String currentCellValue = getCellValue (currentCell, true);
                            html += "<td>" + currentCellValue + "</td>";
                        }
                    } else {
                        html += " ";
                    }
                    html += "</tr>";
                }
                html += "</table>";

                ByteArrayOutputStream outStream = new ByteArrayOutputStream();
                DOMSource domSource = new DOMSource();
                StreamResult streamResult = new StreamResult(outStream);

                TransformerFactory tf = TransformerFactory.newInstance();
                Transformer serializer = tf.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(domSource, streamResult);
                outStream.close();
                FileUtils.writeStringToFile(new File(filePath, htmlName), html, "utf-8");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return htmlUrl;

    }

    /**
     * 读取单元格内容
     *
     * @param cell
     * @param treatAsStr
     * @return
     */
    private static String getCellValue(Cell cell, boolean treatAsStr) {
        if (cell == null) {
            return "";
        }
        if (treatAsStr) {
            cell.setCellType(CellType.STRING);
        }
        if (cell.getCellType() == CellType.BOOLEAN) {
            return String.valueOf(cell.getBooleanCellValue());
        } else if (cell.getCellType() == CellType.NUMERIC) {
            return String.valueOf(cell.getNumericCellValue());
        } else {
            return String.valueOf(cell.getStringCellValue());
        }
    }
}

3、ppt to image

/**
所需依赖版本 包含 excel转html,word转html
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.0</version>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.0</version>
</dependency>

<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.4</version>
</dependency>

<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>2.0.2</version>
</dependency>

*/


import java.awt.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;

import com.zmj.plat.exception.BusinessException;
import org.apache.poi.hslf.usermodel.*;
import org.apache.poi.xslf.usermodel.*;

import javax.imageio.ImageIO;

public class PPTToImage {
    public static void main(String[] args)throws Exception {
        // 读入PPT文件
        String filePath = "D:/resumeFile/";
        String fileName = "test.pptx";
        PPTtoImage(filePath,fileName);
    }

    public static void PPTtoImage(String filePath,String fileName)throws Exception{
        File f = new File(filePath + fileName);
        if(f.exists()){
            if(f.getName().endsWith(".pptx") || f.getName().endsWith(".PPTX")){
                PPT07toImage(filePath,fileName);
            }else {
                PPT03toImage(filePath,fileName);
            }
        }else{
            throw new BusinessException("文档不存在");
        }
    }

    public static void PPT07toImage(String filePath,String fileName) throws Exception{
        FileInputStream is = new FileInputStream(filePath + fileName);
        XMLSlideShow ppt = new XMLSlideShow(is);
        is.close();
        Dimension pgsize = ppt.getPageSize();
        System.out.println(pgsize.width+"--"+pgsize.height);

        for (int i = 0; i < ppt.getSlides().size(); i++) {
            try {
                //防止中文乱码
                for(XSLFShape shape : ppt.getSlides().get(i).getShapes()){
                    if(shape instanceof XSLFTextShape) {
                        XSLFTextShape tsh = (XSLFTextShape)shape;
                        for(XSLFTextParagraph p : tsh){
                            for(XSLFTextRun r : p){
                                r.setFontFamily("宋体");
                            }
                        }
                    }
                }
                BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB);
                Graphics2D graphics = img.createGraphics();
                // clear the drawing area
                graphics.setPaint(Color.white);
                graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));
                // render
                ppt.getSlides().get(i).draw(graphics);
                // save the output
                String pptname = fileName.substring(0,fileName.lastIndexOf("."));
                String newimgPath = filePath + "image/"+pptname + "/";
                File imgPath = new File(newimgPath);
                if(!imgPath.exists()){//图片目录不存在则创建
                    imgPath.mkdirs();
                }
                String file =  newimgPath + (i+1) + ".jpg";
                FileOutputStream out = new FileOutputStream(file);
                javax.imageio.ImageIO.write(img, "png", out);
                out.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    public static void PPT03toImage(String filePath,String fileName){
        try {
            HSLFSlideShow ppt = new HSLFSlideShow(new HSLFSlideShowImpl(filePath + fileName));

            Dimension pgsize = ppt.getPageSize();
            for (int i = 0; i < ppt.getSlides().size(); i++) {
                //防止中文乱码
                for(HSLFShape shape : ppt.getSlides().get(i).getShapes()){
                    if(shape instanceof HSLFTextShape) {
                        HSLFTextShape tsh = (HSLFTextShape)shape;
                        for(HSLFTextParagraph p : tsh){
                            for(HSLFTextRun r : p){
                                r.setFontFamily("宋体");
                            }
                        }
                    }
                }
                BufferedImage img = new BufferedImage(pgsize.width, pgsize.height,  BufferedImage.TYPE_INT_RGB);
                Graphics2D graphics = img.createGraphics();
                // clear the drawing area
                graphics.setPaint(Color.white);
                graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));
                // render
                ppt.getSlides().get(i).draw(graphics);

                // save the output
                String pptname = fileName.substring(0,fileName.lastIndexOf("."));
                String newimgPath = filePath + "image/"+pptname + "/";
                File imgPath = new File(newimgPath);
                if(!imgPath.exists()){//图片目录不存在则创建
                    imgPath.mkdirs();
                }
                String file =  newimgPath + (i+1) + ".jpg";
                FileOutputStream out = new FileOutputStream(file);
                javax.imageio.ImageIO.write(img, "png", out);
                out.close();
                //resizeImage(filename, filename, width, height);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    /***
     * 功能 :调整图片大小
     * @param srcImgPath 原图片路径
     * @param distImgPath  转换大小后图片路径
     * @param width   转换后图片宽度
     * @param height  转换后图片高度
     */
    public static void resizeImage(String srcImgPath, String distImgPath,
                                   int width, int height) throws IOException {
        File srcFile = new File(srcImgPath);
        Image srcImg = ImageIO.read(srcFile);
        BufferedImage buffImg = null;
        buffImg = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
        buffImg.getGraphics().drawImage(
                srcImg.getScaledInstance(width, height, Image.SCALE_SMOOTH), 0,
                0, null);
        ImageIO.write(buffImg, "JPEG", new File(distImgPath));
    }

}

4、html to pdf

html to pdf 需要系统字体,否则会出现中文乱码。

/*所需依赖
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>html2pdf</artifactId>
<version>2.0.2</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>font-asian</artifactId>
<version>7.1.2</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>barcodes</artifactId>
<version>7.1.2</version>
</dependency>
*/

import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.html2pdf.resolver.font.DefaultFontProvider;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.layout.Document;

import java.io.*;

public class HtmlToPdf {
    private static String pdfPath = "D:/resumeFile/测试文档.pdf";
    private static String SRC = "D:/resumeFile/测试文档.html";
    private static String chineseFontPath = "D:/resumeFile/font.ttf";

    public static void main(String[] args) {
        try {
            toPdf(SRC, pdfPath, chineseFontPath);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

   public static void toPdf(String html, String pdfPath,String font) throws Exception {
        ConverterProperties props = new ConverterProperties();
        DefaultFontProvider defaultFontProvider = new DefaultFontProvider(false, false, false);
        //DefaultFontProvider defaultFontProvider = new DefaultFontProvider(true, true, true);
        defaultFontProvider.addFont(font);
        props.setFontProvider(defaultFontProvider);
        PdfWriter writer = new PdfWriter(pdfPath);
        PdfDocument pdf = new PdfDocument(writer);
        //pdf.setDefaultPageSize(new PageSize(595.0F, 842.0F));
        pdf.setDefaultPageSize(new PageSize(595.0F, 842.0F));
        Document document = HtmlConverter.convertToDocument(new FileInputStream(html), pdf, props);
        document.close();
        pdf.close();

    }
}