Java通过Apach POI获取文档页数(Word、PPT、PDF)

1,729 阅读2分钟

用户上传文件需要后台获取文件的页数,这里使用Apach POI来获取文档的页数

Maven项目使用依赖

    <dependencies>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.0.6</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>4.1.2</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>4.1.2</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>4.1.2</version>
        </dependency>
    </dependencies>

目前市场大部分项目都是基于springBoot架构,一般接收文件都是使用MultipartFile对象来接收文件,默认上传大小为1MB,比较鸡肋所以我们可以通过在yml文件中配置属性

# 设置该属性表示上传文件大小不受限制
servlet:
  multipart:
    max-file-size: -1
    max-request-size: -1

各个文件的获取页数的方法不一样,所以要通过后缀获取文件类型

    String fileName = file.getOriginalFilename();
    String type = originalfileName.substring(fileName.lastIndexOf("."));

接下来就是需要将MultipartFile转成InputStream流的形式进行解析

MultipartFile转成InputStream的方法:
MultipartFile file; // 接收到的file文件
byte[] bytes = file.getBytes(); // 将接收到的文件file转换成字节数组
InputStream byteArrayInputStream = new ByteArrayInputStream(bytes); // 在通过ByteArrayInputStream转成InputStream流
Integer page = FilePagesUtils.filesPage(byteArrayInputStream, type); // 调用工具类传入InputStream流和文件类型

工具类

public class FilePagesUtils {
    /**
     * 
     * @param fileInputStream  文件流
     * @param fileType  文件后缀
     * @return
     * @throws IOException
     */
    public static int filesPage(InputStream fileInputStream, String fileType) throws IOException {
        int count = 0;
        if (".doc".equals(fileType)) {
            count = countWord2003Page(fileInputStream);
        }
        if (".docx".equals(fileType)) {
            count = countWord2007Page(fileInputStream);
        }
        if (".pdf".equals(fileType)) {
            count = countPdfPage(fileInputStream);
        }
        if (".pptx".equals(fileType)) {
            count = countPPTXPage(fileInputStream);
        }
        if (".ppt".equals(fileType)) {
            count = countPPTPage(fileInputStream);
        }
        return count;
    }
 
    /**
     * 计算PDF格式文档的页数
     */
    public static int countPdfPage(InputStream fileInputStream) {
        int pageCount = 0;
        PdfReader reader = null;
        try {
            reader = new PdfReader(fileInputStream);
            pageCount = reader.getNumberOfPages();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            reader.close();
        }
        return pageCount;
    }
 
    /**
     * 计算PPTX格式文档的页数
     * @param fileInputStream
     * @return
     * @throws IOException
     */
    public static int countPPTPage(InputStream fileInputStream) throws IOException {
        int pageCount = 0;
        ZipSecureFile.setMinInflateRatio(-1.0d);
 
        HSLFSlideShow hslfSlideShow = new HSLFSlideShow(fileInputStream);
        try {
            pageCount = hslfSlideShow.getSlides().size();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            fileInputStream.close();
        }
        return pageCount;
 
    }
 
    /**
     * 计算PPTX格式文档的页数
     */
    public static int countPPTXPage(InputStream fileInputStream) throws IOException {
        int pageCount = 0;
        ZipSecureFile.setMinInflateRatio(-1.0d);
        try {
            XMLSlideShow pptxFile = new XMLSlideShow(fileInputStream);
            pageCount = pptxFile.getSlides().size();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            fileInputStream.close();
        }
        return pageCount;
    }
 
    /**
     * 计算WORD2007(*.docx)格式文档的页数
     */
    public static int countWord2007Page(InputStream fileInputStream) throws IOException {
        int pageCount = 0;
        ZipSecureFile.setMinInflateRatio(-1.0d);
        XWPFDocument docx = null;
        try {
            docx = new XWPFDocument(fileInputStream);
            pageCount = docx.getProperties().getExtendedProperties().getUnderlyingProperties().getPages();//总页数
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            docx.close();
        }
        return pageCount;
    }
 
    /**
     * 计算WORD2003(*.doc)格式文档的页数
     */
    public static int countWord2003Page(InputStream fileInputStream) throws IOException {
        int pageCount = 0;
        WordExtractor doc = null;
        ZipSecureFile.setMinInflateRatio(-1.0d);
        try {
            doc = new WordExtractor(fileInputStream);//.doc格式Word文件提取器
            pageCount = doc.getSummaryInformation().getPageCount();//总页数
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            doc.close();
        }
        return pageCount;
    }
}