以下代码需要用到poi和itext的jar包,实现读取各种文档(.txt,.docx,.doc,.xls,.xlsx,.pptx,)的内容,并转化为pdf格式文档。
public class ReadFiles {
//attention:it can be a file or a directory
private String sourcePath;
//it must be same to the sourcePath in format
private String destPath;
private List<File> files;
public ReadFiles(String sourcePath,String destPath){
this.sourcePath = sourcePath;
this.destPath = destPath;
this.files = new ArrayList<>(12);
}
//获取原路径下的所有文件
public List<File> getAllFilePath(){
if(sourcePath != null){
File file = new File(sourcePath);
if(!file.exists()){
System.out.println("原路径不存在,请重新输入");
return null;
}
getFile(file);
}
System.out.println(files);
return files;
}
//递归实现读取file
private void getFile(File file){
if(file.isFile()){
files.add(file);
}else if(file.isDirectory()){
File[] fileArrays = file.listFiles();
if(fileArrays != null){
for(File file1 : fileArrays){
getFile(file1);
}
}
}
}
//读取并开始转换为pdf文件
private void convert2Pdf(){
File source = new File(sourcePath);
File dest = new File(destPath);
if(!dest.exists() && dest.isDirectory()){
dest.mkdir();
}else if(dest.isFile()){
convert2PdfOneByOne(dest);
}
if((source.isDirectory() && dest.isFile()) || (source.isFile() && dest.isDirectory())){
System.out.println("转换失败");
return;
}
for(File file : files){
convert2PdfOneByOne(file);
}
}
//转换某一个文件
private void convert2PdfOneByOne(File file){
String name = file.getName();
String prefix = name.substring(0, name.indexOf('.'));
String content = getText(file);
System.out.println(content);
try {
BaseFont bfChinese = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
Font fontChinese = new Font(bfChinese, 12, Font.NORMAL);
FileOutputStream outputStream = new FileOutputStream(destPath + File.separator + prefix + ".pdf");
Rectangle rect = new Rectangle(PageSize.A4.rotate());
Document doc = new Document(rect);
PdfWriter.getInstance(doc,outputStream);
doc.open();
Paragraph p = new Paragraph(content,fontChinese);
doc.add(p);
doc.close();
} catch (DocumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//获取某个文件的文本信息
private String getText(File file){
String name = file.getName();
try {
if(name.endsWith(".xlsx")){
XSSFWorkbook workbook = new XSSFWorkbook(file);
ExcelExtractor extractor = new XSSFExcelExtractor(workbook);
String content = extractor.getText();
workbook.close();
return content;
}else if(name.endsWith(".xls")){
POIFSFileSystem fileSystem = new POIFSFileSystem(file);
ExcelExtractor extractor = new EventBasedExcelExtractor(fileSystem);
String content = extractor.getText();
fileSystem.close();
return content;
}else if(name.endsWith(".docx")){
XWPFDocument document = new XWPFDocument(new FileInputStream(file));
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
String content = extractor.getText();
document.close();
return content;
}else if(name.endsWith(".doc")){
InputStream inputStream = new FileInputStream(file);
WordExtractor extractor = new WordExtractor(inputStream);
String content = extractor.getText();
inputStream.close();
return content;
}else if(name.endsWith(".txt")){
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"GBK"));
StringBuilder sb = new StringBuilder(16);
String txt = "";
while((txt = reader.readLine()) != null){
sb.append(txt);
}
reader.close();
return sb.toString();
}else if(name.endsWith(".pptx")){
SlideShowExtractor<XSLFShape, XSLFTextParagraph> extractor = new SlideShowExtractor<>(new XMLSlideShow(new FileInputStream(file)));
return extractor.getText();
}
} catch (IOException e) {
e.printStackTrace();
}catch (OpenXML4JException e) {
e.printStackTrace();
}
return "";
}
public static void main(String[] args) {
ReadFiles readFiles = new ReadFiles("D:\\KGMultiMedia\\FilesToPdf", "D:\\KGMultiMedia\\pdf");
readFiles.getAllFilePath();
readFiles.convert2Pdf();
}
}