基础文档预览: 基础文档->html->pdf ->网页预览pdf
1、word to html
/**
所需依赖版本 包含 excel转html,word转html
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>2.0.2</version>
</dependency>
*/
import com.zmj.plat.exception.BusinessException
import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor
import fr.opensagres.poi.xwpf.converter.core.FileURIResolver
import fr.opensagres.poi.xwpf.converter.core.IXWPFConverter
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions
import org.apache.poi.hwpf.HWPFDocument
import org.apache.poi.hwpf.converter.PicturesManager
import org.apache.poi.hwpf.converter.WordToHtmlConverter
import org.apache.poi.hwpf.usermodel.PictureType
import org.apache.poi.xwpf.usermodel.XWPFDocument
import org.w3c.dom.Document
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.parsers.ParserConfigurationException
import javax.xml.transform.OutputKeys
import javax.xml.transform.Transformer
import javax.xml.transform.TransformerException
import javax.xml.transform.TransformerFactory
import javax.xml.transform.dom.DOMSource
import javax.xml.transform.stream.StreamResult
import java.io.*
public class WordToHtml {
public static void main(String[] args) {
String filepath = "D:/resumeFile/"
String fileName = "test.doc"
String htmlName = "test.html"
String suffix = fileName.substring(fileName.lastIndexOf(".") + 1)
System.out.println(suffix)
/* try {
wordToHtml(filepath,fileName,htmlName)
} catch (Exception exception) {
exception.printStackTrace()
}*/
}
public static void wordToHtml(String filepath,String fileName) throws IOException, TransformerException, ParserConfigurationException {
String htmlName = fileName.substring(0,fileName.lastIndexOf(".")) + ".html"
String file = filepath + fileName
File f = new File(file)
if(f.exists()){
if(f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")){
word07ToHtml(filepath,fileName,htmlName)
}else {
word03ToHtml(filepath,fileName,htmlName)
}
}else{
throw new BusinessException("文档不存在")
}
}
public static void word07ToHtml(String filepath,String fileName,String htmlName)throws IOException {
String file = filepath + fileName
File f = new File(file)
if(f.exists()){
if(f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")){
// 1) 加载word文档生成 XWPFDocument对象
InputStream in = new FileInputStream(f)
XWPFDocument document = new XWPFDocument(in)
// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
File imageFolderFile = new File(filepath)
XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile))
options.setExtractor(new FileImageExtractor(imageFolderFile))
options.setIgnoreStylesIfUnused(false)
options.setFragment(true)
// 3) 将 XWPFDocument转换成XHTML
OutputStream out = new FileOutputStream(new File(filepath + htmlName))
IXWPFConverter<XHTMLOptions> instance = XHTMLConverter.getInstance()
instance.convert(document, out, options)
}
}else{
throw new BusinessException("文档不存在")
}
}
public static void word03ToHtml(String filepath,String fileName,String htmlName) throws IOException, TransformerException, ParserConfigurationException {
final String imagepath = "D:/resumeFile/image"
String file = filepath + fileName
InputStream input = new FileInputStream(new File(file))
HWPFDocument wordDocument = new HWPFDocument(input)
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument())
//设置图片存放的位置
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
File imgPath = new File(imagepath)
if(!imgPath.exists()){//图片目录不存在则创建
imgPath.mkdirs()
}
File file = new File(imagepath + suggestedName)
try {
OutputStream os = new FileOutputStream(file)
os.write(content)
os.close()
} catch (FileNotFoundException e) {
e.printStackTrace()
} catch (IOException e) {
e.printStackTrace()
}
return imagepath + suggestedName
}
})
//解析word文档
wordToHtmlConverter.processDocument(wordDocument)
Document htmlDocument = wordToHtmlConverter.getDocument()
File htmlFile = new File(filepath + htmlName)
OutputStream outStream = new FileOutputStream(htmlFile)
//也可以使用字符数组流获取解析的内容
// ByteArrayOutputStream baos = new ByteArrayOutputStream()
// OutputStream outStream = new BufferedOutputStream(baos)
DOMSource domSource = new DOMSource(htmlDocument)
StreamResult streamResult = new StreamResult(outStream)
TransformerFactory factory = TransformerFactory.newInstance()
Transformer serializer = factory.newTransformer()
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8")
serializer.setOutputProperty(OutputKeys.INDENT, "yes")
serializer.setOutputProperty(OutputKeys.METHOD, "html")
serializer.transform(domSource, streamResult)
//也可以使用字符数组流获取解析的内容
// String content = baos.toString()
// System.out.println(content)
// baos.close()
outStream.close()
}
}
2、excel to html
import java.io.*;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import com.zmj.plat.exception.BusinessException;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.w3c.dom.Document;
public class ExcelToHtml {
public static void main(String args[]) throws Exception {
String rootPath = "D:\\";
String fileUrl = "接口文档.xlsx";
try {
String htmlUrl = excelToHtml(rootPath, fileUrl);
} catch (IOException e) {
e.printStackTrace();
}
}
public static String excelToHtml(String filepath,String fileName)throws Exception{
String htmlName = fileName.substring(0,fileName.lastIndexOf(".")) + ".html";
String file = filepath + fileName;
File f = new File(file);
if(f.exists()){
if(f.getName().endsWith(".xlsx") || f.getName().endsWith(".XLSX")){
xlxsToHtml(filepath,fileName,htmlName);
}else {
xlsToHtml(filepath,fileName,htmlName);
}
}else{
throw new BusinessException("文档不存在");
}
return null;
}
public static String xlsToHtml(String filePath, String fileName,String htmlName) throws IOException {
InputStream input = null;
ExcelToHtmlConverter excelToHtmlConverter = null;
try {
input = new FileInputStream(filePath + fileName);
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
HSSFWorkbook excelBook = new HSSFWorkbook(input);
try {
excelToHtmlConverter = new ExcelToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
} catch (ParserConfigurationException e1) {
e1.printStackTrace();
}
excelToHtmlConverter.processWorkbook(excelBook);
List pics = excelBook.getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(filePath + pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = (Document) excelToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = null;
try {
serializer = tf.newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
try {
serializer.transform(domSource, streamResult);
} catch (TransformerException e) {
e.printStackTrace();
}
outStream.close();
String content = new String(outStream.toByteArray());
String htmlUrl = filePath + htmlName;
String Path = htmlUrl.replace("\\", "/");
FileUtils.writeStringToFile(new File(Path, htmlName), content, "utf-8");
return htmlUrl;
}
public static String xlxsToHtml(String filePath, String fileName,String htmlName) throws Exception {
Workbook workbook = null;
InputStream is = new FileInputStream(filePath + fileName);
String htmlUrl = "";
try {
String html = "";
workbook = new XSSFWorkbook(is);
for (int numSheet = 0; numSheet < workbook.getNumberOfSheets(); numSheet++) {
Sheet sheet = workbook.getSheetAt(numSheet);
if (sheet == null) {
continue;
}
html += sheet.getSheetName() + "<br><br>";
int firstRowIndex = sheet.getFirstRowNum();
int lastRowIndex = sheet.getLastRowNum();
html += "<table border='1' align='left'>";
Row firstRow = sheet.getRow(firstRowIndex);
if(firstRow == null){
continue;
}
for (int i = firstRow.getFirstCellNum(); i <= firstRow.getLastCellNum(); i++) {
Cell cell = firstRow.getCell(i);
String cellValue = getCellValue(cell, true);
html += "<th>" + cellValue + "</th>";
}
for (int rowIndex = firstRowIndex + 1; rowIndex <= lastRowIndex; rowIndex++) {
Row currentRow = sheet.getRow(rowIndex);
html += "<tr>";
if (currentRow != null) {
int firstColumnIndex = currentRow.getFirstCellNum();
int lastColumnIndex = currentRow.getLastCellNum();
for (int columnIndex = firstColumnIndex; columnIndex <= lastColumnIndex; columnIndex++) {
if(columnIndex < 0){
continue;
}
Cell currentCell = currentRow.getCell(columnIndex);
String currentCellValue = getCellValue (currentCell, true);
html += "<td>" + currentCellValue + "</td>";
}
} else {
html += " ";
}
html += "</tr>";
}
html += "</table>";
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource();
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
FileUtils.writeStringToFile(new File(filePath, htmlName), html, "utf-8");
}
} catch (Exception e) {
e.printStackTrace();
}
return htmlUrl;
}
private static String getCellValue(Cell cell, boolean treatAsStr) {
if (cell == null) {
return "";
}
if (treatAsStr) {
cell.setCellType(CellType.STRING);
}
if (cell.getCellType() == CellType.BOOLEAN) {
return String.valueOf(cell.getBooleanCellValue());
} else if (cell.getCellType() == CellType.NUMERIC) {
return String.valueOf(cell.getNumericCellValue());
} else {
return String.valueOf(cell.getStringCellValue());
}
}
}
3、ppt to image
import java.awt.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import com.zmj.plat.exception.BusinessException;
import org.apache.poi.hslf.usermodel.*;
import org.apache.poi.xslf.usermodel.*;
import javax.imageio.ImageIO;
public class PPTToImage {
public static void main(String[] args)throws Exception {
String filePath = "D:/resumeFile/";
String fileName = "test.pptx";
PPTtoImage(filePath,fileName);
}
public static void PPTtoImage(String filePath,String fileName)throws Exception{
File f = new File(filePath + fileName);
if(f.exists()){
if(f.getName().endsWith(".pptx") || f.getName().endsWith(".PPTX")){
PPT07toImage(filePath,fileName);
}else {
PPT03toImage(filePath,fileName);
}
}else{
throw new BusinessException("文档不存在");
}
}
public static void PPT07toImage(String filePath,String fileName) throws Exception{
FileInputStream is = new FileInputStream(filePath + fileName);
XMLSlideShow ppt = new XMLSlideShow(is);
is.close();
Dimension pgsize = ppt.getPageSize();
System.out.println(pgsize.width+"--"+pgsize.height);
for (int i = 0; i < ppt.getSlides().size(); i++) {
try {
for(XSLFShape shape : ppt.getSlides().get(i).getShapes()){
if(shape instanceof XSLFTextShape) {
XSLFTextShape tsh = (XSLFTextShape)shape;
for(XSLFTextParagraph p : tsh){
for(XSLFTextRun r : p){
r.setFontFamily("宋体");
}
}
}
}
BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
graphics.setPaint(Color.white);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));
ppt.getSlides().get(i).draw(graphics);
String pptname = fileName.substring(0,fileName.lastIndexOf("."));
String newimgPath = filePath + "image/"+pptname + "/";
File imgPath = new File(newimgPath);
if(!imgPath.exists()){
imgPath.mkdirs();
}
String file = newimgPath + (i+1) + ".jpg";
FileOutputStream out = new FileOutputStream(file);
javax.imageio.ImageIO.write(img, "png", out);
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
public static void PPT03toImage(String filePath,String fileName){
try {
HSLFSlideShow ppt = new HSLFSlideShow(new HSLFSlideShowImpl(filePath + fileName));
Dimension pgsize = ppt.getPageSize();
for (int i = 0; i < ppt.getSlides().size(); i++) {
for(HSLFShape shape : ppt.getSlides().get(i).getShapes()){
if(shape instanceof HSLFTextShape) {
HSLFTextShape tsh = (HSLFTextShape)shape;
for(HSLFTextParagraph p : tsh){
for(HSLFTextRun r : p){
r.setFontFamily("宋体");
}
}
}
}
BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
graphics.setPaint(Color.white);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));
ppt.getSlides().get(i).draw(graphics);
String pptname = fileName.substring(0,fileName.lastIndexOf("."));
String newimgPath = filePath + "image/"+pptname + "/";
File imgPath = new File(newimgPath);
if(!imgPath.exists()){
imgPath.mkdirs();
}
String file = newimgPath + (i+1) + ".jpg";
FileOutputStream out = new FileOutputStream(file);
javax.imageio.ImageIO.write(img, "png", out);
out.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void resizeImage(String srcImgPath, String distImgPath,
int width, int height) throws IOException {
File srcFile = new File(srcImgPath);
Image srcImg = ImageIO.read(srcFile);
BufferedImage buffImg = null;
buffImg = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
buffImg.getGraphics().drawImage(
srcImg.getScaledInstance(width, height, Image.SCALE_SMOOTH), 0,
0, null);
ImageIO.write(buffImg, "JPEG", new File(distImgPath));
}
}
4、html to pdf
html to pdf 需要系统字体,否则会出现中文乱码。
/*所需依赖
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>html2pdf</artifactId>
<version>2.0.2</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>font-asian</artifactId>
<version>7.1.2</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>barcodes</artifactId>
<version>7.1.2</version>
</dependency>
*/
import com.itextpdf.html2pdf.ConverterProperties
import com.itextpdf.html2pdf.HtmlConverter
import com.itextpdf.html2pdf.resolver.font.DefaultFontProvider
import com.itextpdf.kernel.geom.PageSize
import com.itextpdf.kernel.pdf.PdfDocument
import com.itextpdf.kernel.pdf.PdfWriter
import com.itextpdf.layout.Document
import java.io.*
public class HtmlToPdf {
private static String pdfPath = "D:/resumeFile/测试文档.pdf"
private static String SRC = "D:/resumeFile/测试文档.html"
private static String chineseFontPath = "D:/resumeFile/font.ttf"
public static void main(String[] args) {
try {
toPdf(SRC, pdfPath, chineseFontPath)
} catch (Exception e) {
e.printStackTrace()
}
}
public static void toPdf(String html, String pdfPath,String font) throws Exception {
ConverterProperties props = new ConverterProperties()
DefaultFontProvider defaultFontProvider = new DefaultFontProvider(false, false, false)
//DefaultFontProvider defaultFontProvider = new DefaultFontProvider(true, true, true)
defaultFontProvider.addFont(font)
props.setFontProvider(defaultFontProvider)
PdfWriter writer = new PdfWriter(pdfPath)
PdfDocument pdf = new PdfDocument(writer)
//pdf.setDefaultPageSize(new PageSize(595.0F, 842.0F))
pdf.setDefaultPageSize(new PageSize(595.0F, 842.0F))
Document document = HtmlConverter.convertToDocument(new FileInputStream(html), pdf, props)
document.close()
pdf.close()
}
}