接上一篇:将文档保存为pdf格式

401 阅读1分钟

以下代码需要用到poi和itext的jar包,实现读取各种文档(.txt,.docx,.doc,.xls,.xlsx,.pptx,)的内容,并转化为pdf格式文档。

public class ReadFiles {

    //attention:it can be a file or a directory
    private String sourcePath;
    //it must be same to the sourcePath in format
    private String destPath;
    private List<File> files;
    public ReadFiles(String sourcePath,String destPath){
        this.sourcePath = sourcePath;
        this.destPath = destPath;
        this.files = new ArrayList<>(12);
    }

    //获取原路径下的所有文件
    public List<File> getAllFilePath(){
        if(sourcePath != null){
            File file = new File(sourcePath);
            if(!file.exists()){
                System.out.println("原路径不存在,请重新输入");
                return null;
            }
            getFile(file);
        }
        System.out.println(files);
        return files;
    }

    //递归实现读取file
    private void getFile(File file){

        if(file.isFile()){
            files.add(file);
        }else if(file.isDirectory()){
            File[] fileArrays = file.listFiles();
            if(fileArrays != null){
                for(File file1 : fileArrays){
                    getFile(file1);
                }
            }
        }
    }

    //读取并开始转换为pdf文件
    private void convert2Pdf(){
        File source = new File(sourcePath);
        File dest = new File(destPath);
        if(!dest.exists() && dest.isDirectory()){
            dest.mkdir();
        }else if(dest.isFile()){
            convert2PdfOneByOne(dest);
        }
        if((source.isDirectory() && dest.isFile()) || (source.isFile() && dest.isDirectory())){
            System.out.println("转换失败");
            return;
        }
        for(File file : files){
            convert2PdfOneByOne(file);
        }
    }

    //转换某一个文件
    private void convert2PdfOneByOne(File file){
        String name = file.getName();
        String prefix = name.substring(0, name.indexOf('.'));
        String content = getText(file);
        System.out.println(content);
        try {
            BaseFont bfChinese = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
            Font fontChinese = new Font(bfChinese, 12, Font.NORMAL);
            FileOutputStream outputStream = new FileOutputStream(destPath + File.separator + prefix + ".pdf");
            Rectangle rect = new Rectangle(PageSize.A4.rotate());
            Document doc = new Document(rect);
            PdfWriter.getInstance(doc,outputStream);
            doc.open();
            Paragraph p = new Paragraph(content,fontChinese);
            doc.add(p);
            doc.close();
        } catch (DocumentException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    //获取某个文件的文本信息
    private String getText(File file){
        String name = file.getName();
        try {
            if(name.endsWith(".xlsx")){
                XSSFWorkbook workbook = new XSSFWorkbook(file);
                ExcelExtractor extractor = new XSSFExcelExtractor(workbook);
                String content = extractor.getText();
                workbook.close();
                return content;
            }else if(name.endsWith(".xls")){
                POIFSFileSystem fileSystem = new POIFSFileSystem(file);
                ExcelExtractor extractor = new EventBasedExcelExtractor(fileSystem);
                String content = extractor.getText();
                fileSystem.close();
                return content;
            }else if(name.endsWith(".docx")){
                XWPFDocument document = new XWPFDocument(new FileInputStream(file));
                XWPFWordExtractor extractor = new XWPFWordExtractor(document);
                String content = extractor.getText();
                document.close();
                return content;
            }else if(name.endsWith(".doc")){
                InputStream inputStream = new FileInputStream(file);
                WordExtractor extractor = new WordExtractor(inputStream);
                String content = extractor.getText();
                inputStream.close();
                return content;
            }else if(name.endsWith(".txt")){
                BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"GBK"));
                StringBuilder sb = new StringBuilder(16);
                String txt = "";
                while((txt = reader.readLine()) != null){
                    sb.append(txt);
                }
                reader.close();
                return sb.toString();
            }else if(name.endsWith(".pptx")){
                SlideShowExtractor<XSLFShape, XSLFTextParagraph> extractor = new SlideShowExtractor<>(new XMLSlideShow(new FileInputStream(file)));
                return extractor.getText();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }catch (OpenXML4JException e) {
            e.printStackTrace();
        }
        return "";
    }


    public static void main(String[] args) {
        ReadFiles readFiles = new ReadFiles("D:\\KGMultiMedia\\FilesToPdf", "D:\\KGMultiMedia\\pdf");
        readFiles.getAllFilePath();
        readFiles.convert2Pdf();
    }
}