Base64 PDF to image

1,454 阅读4分钟

前言

之前有个需求是前端需要展示从服务端获取的 PDF 文件,项目是嵌入在 webview 中的单页面应用。由于使用的是 React,于是乎很自然的找到了 react-pdf 这个库,使用下来体验还不错。不过,随着需求的推进 svg 模式在预览包含多选框的文件时会有样式问题,被迫使用了 canvas 模式。本以为已经解决了问题,其实不然。

Canvas maximum limit 和 Canvas memory maximum limit

需求希望预览的时候不分页,且需要支持可缩放。在 iOS 上容易出现 Canvas area exceeds the maximum limit (width * height > 16777216) 或 Total canvas memory use exceeds the maximum limit 的问题造成显示空白。总而言之就是画布太大了。react-pdf 的 canvas 模式是将每一页都渲染生成一个单独的 canvas 标签,这正是问题的症结所在。

解决方案

PDF 只有一页是在可行范围之内的,mobile 中宽高的乘积自然是不会超过限制大小的(除非放大),也就不会有超出内存的问题。于是乎就有了一个思路:

  1. 按页加载
  2. 将当前页通过 canvas 生成,再转换为 png,记录下 png 的 base64
  3. 销毁 canvas 再重新生成
  4. 重复上述步骤直到最后一页转换完成
  5. 将得到的图片赋予 img 标签的 src

从获取文件开始

前端无法直接预览 PDF 文件,除非获取到了原生的支持,所以比较方便的是直接使用链接或者获取 base64 数据配合例如 react-pdf 之类的库使用。我这里使用了 base64。

将文件解析为 blob

export async function downloadFile(params: { data: Record<string, any> }) {
  const { data } = params;
  const token = getToken();

  const payload = {
    ...data,
    accessToken: token,
  };

  const options = {
    method: 'get',
    headers: {
      Charset: 'UTF-8',
      'content-Type': 'application/x-www-form-urlencoded',
    },
    responseType: 'blob',
    params: payload,
  };
  return request<Blob>(`${Config.BASEURL}${RequestUrls.serverDownload}`, options as any);
}

得到 PDF 的 base64 编码

// 代码有精简
const previewPDF = async (attachId: string | undefined) => {
  if (!attachId) {
    Toast.fail("PDF 不存在!");
    return;
  }

  const blob = (await downloadFile({
    data: {
      id: attachId,
    },
  })) as unknown as Blob;

  const blobType = blob.type;
  const blobReader = new FileReader();

  blobReader.onload = (event: any) => {
    const blobText = event?.currentTarget?.result;
    if (blobType === "application/json") {
      // type: 'application/json', text: '{"resultCode":"500","resultMsg":"请稍后重试","params":[]}'}
      const parsedBlobText = JSON.parse(blobText);
      showAlert({
        title: "PDF 获取失败!",
        message: `${parsedBlobText?.resultMsg || "请稍后重试!"}`,
        onOk: () => {},
      });
      return;
    }

    if (!blobText.includes("PDF")) {
      showAlert({
        title: "PDF 查看失败!",
        message: "文件类型错误!",
        onOk: () => {},
      });
      return;
    }

    let reader = new FileReader();
    reader.readAsDataURL(blob as unknown as Blob);
    reader.addEventListener("load", function () {
      let base64: string = "";
      if (reader?.result && typeof reader.result === "string") {
        base64 = reader.result.split(",")[1];
        setPdfBase64(base64);
      }
    });
    toggleShow();
  };
  blobReader.readAsText(blob);
};

将 PDF 的 base64 编码转换为 png

import { Toast } from 'antd-mobile';

const PDFJS = require('pdfjs-dist/legacy/build/pdf.js'); // import 会报错
const workerSrc = require('pdfjs-dist/legacy/build/pdf.worker.entry');

interface Base64ToImageOptions {
  pdf: string;
  quality?: number;
  type?: 'base64' | 'url';
}

// 建议包一层 try-catch 使用,如果是 cdn 加载的建议配合 ErrorBoundry
// 如使用 cdn 方式加载 pdf.js 可考虑封装为 hook
const cover = ({ pdf, quality = 1.0, type = 'base64' }: Base64ToImageOptions) => {
  return new Promise<Array<string>>((resolve, reject) => {
    let images: Array<string> = [];
    let totalPages = 0;

    const getDocument = () => {
      Toast.loading('PDF 加载中...', 0);
      var loadingTask = PDFJS.getDocument({
        url: type === 'base64' ? `data:application/pdf;base64,${pdf}` : pdf,
      });
      loadingTask.promise
        .then((pdf: any) => {
          totalPages = pdf._pdfInfo.numPages;
          recurseGetBase64PngString(pdf);
        })
        .catch((e: any) => {
          console.log(e);
          Toast.hide();
          reject(error);
        });
    };

    const recurseGetBase64PngString = (pdf: any, pageNumber: number = 1) => {
      // 按页加载
      pdf.getPage(Number(pageNumber)).then((page: any) => {
        // 生成 canvas
        let canvas = document.createElement('canvas');
        canvas.id = 'pdf-canvas';

        const scale = 1.5; // 缩放比,若是 pdf 过宽展示不下或文件样式不唯一可根据宽度大小来设置自适应缩放比
        const viewport = page.getViewport({ scale: scale });
        const context = canvas.getContext('2d');
        canvas.height = viewport.height;
        canvas.width = viewport.width;
        const renderContext = {
          canvasContext: context,
          viewport: viewport,
        };

        const renderTask = page.render(renderContext);
        renderTask.promise
          .then(() => {
            // 将当前页通过 canvas 生成,再转换为 png,记录下 png 的 base64
            const src = canvas.toDataURL('image/png', quality);
            images = images.concat(src);
            // 销毁 canvas
            const canvasNode = document.getElementById('pdf-canvas');
            canvasNode && document.removeChild(canvasNode);
            if (pageNumber === totalPages) {
              // 最后一页转换完成
              resolve(images);
              Toast.hide();
            } else {
              // 重复上述步骤
              recurseGetBase64PngString(pdf, pageNumber + 1);
            }
          })
          .catch((e: any) => {
            console.log(e);
            Toast.hide();
            reject(error);
          });
      });
    };

    getDocument(pdf);
  });
};

export default {
  cover,
};

预览

import React, { FC, useState, useCallback } from "react";
import { Modal, Stepper, Button, Toast } from "antd-mobile";

import { Empty } from "@alitajs/antd-mobile-plus";
import { useThrottleFn, usePdfToImage } from "@/hooks";
import { base64PdfToImage } from "@/utils";

import IconClose from "@/assets/close.png";
import styles from "./index.less";

interface PreviewModalType {
  pdfBase64: string;
  initScale?: number;
  visible: boolean;
  toggle: () => void;
}

const PreviewModal: FC<PreviewModalType> = ({
  pdfBase64,
  initScale = 1,
  visible = false,
  toggle,
}) => {
  const options = {
    quality: 1.0,
    // pdf: 'http://localhost:8000/download.do?id=60032335&accessToken=xxx',
    pdf: pdfBase64,
  };

  // const [images, err] = usePdfToImage(options, visible); // hook usage
  const [images, setImages] = useState<Array<string>>([]);
  const [scale, setScale] = useState<number>(initScale);

  const { run: throttleSetScale } = useThrottleFn(
    (val) => {
      setScale(val);
    },
    { wait: 500 }
  );

  const coverBase64PdfToImages = () => {
    try {
      base64PdfToImage
        .cover(options)
        .then((imgArr) => {
          setImages(imgArr);
        })
        .catch((err) => {
          console.log(err);
          Toast.fail((err as Error)?.message || 'PDF 预览失败,请稍后再试!');
        });
    } catch (error) {
      Toast.fail('PDF 预览失败,请稍后再试!');
    }
  } 

  useEffect(() => {
    if (!visible) return;
    if (!pdfBase64) return;
    coverBase64PdfToImages();
  }, [visible, pdfBase64]);

  return (
    <Modal
      visible={visible}
      onClose={toggle}
      popup
      animationType="slide-up"
      className={styles.pdfPreviewModalStyle}
    >
      <div className={styles.pdfPreviewContainer}>
        <div className={styles.titleTop}>
          <div className={styles.title}>PDF 预览</div>
          <img
            src={IconClose}
            alt=""
            className={styles.closeImg}
            onClick={toggle}
          />
        </div>
        <div className={styles.pdfContent}>
          {images.map((img, index) => {
            return (
              <img
                key={index}
                style={{ width: `${100 * scale}%` }} // TODO: 后续改成手势缩放
                src={img}
                alt={`pdf_${index}`}
              />
            );
          })}
          {/* {err || !images.length ? <Empty /> : null} */}
          {!images.length ? <Empty /> : null}
        </div>
        <div className={styles.bottom}>
          <div className={styles.slider}>
            <input
              type="range"
              id="volume"
              min="1"
              max="3"
              defaultValue="1"
              onChange={({ target }) => throttleSetScale(target.value)}
            />
          </div>
        </div>
      </div>
    </Modal>
  );
};

export default PreviewModal;

若使用 cdn

import { useEffect, useState, useCallback } from 'react';
import { useExternal } from 'ahooks';
import { Toast } from 'antd-mobile';

export interface UsePdfToImageOptions {
  pdf: string;
  quality?: number;
  type?: 'base64' | 'url';
}

export default function usePdfToImage(
  { pdf, quality = 1.0, type = 'base64' }: UsePdfToImageOptions,
  visible: boolean = false,
) {
  const [images, setImages] = useState<Array<string>>([]);
  const [error, setError] = useState<Error>();
  const [path, setPath] = useState<string>('');

  const [status, { load }] = useExternal(path, {
    type: 'js',
    async: true,
  }); // ahooks 2.x usage

  useEffect(() => {
    if (!visible || status === 'ready') return;
    // 搭配 declare global 使用,可以不写 any
    if (window.pdfjsLib) return;
    setPath('https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.min.js');
    load();
  }, [visible, status]);

  useEffect(() => {
    switch (status) {
      case 'ready':
        if (window.pdfjsLib) {
          window.pdfjsLib.GlobalWorkerOptions.workerSrc =
            'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.worker.min.js';

          getImagesSrc();
        } else {
          Toast.fail('PDF 依赖加载失败,请稍后再试!');
        }
        break;

      case 'loading':
        Toast.loading('PDF 加载中...', 0);
        break;

      case 'error':
        Toast.fail('PDF 依赖加载失败,请稍后再试!');
        break;

      default:
        break;
    }
  }, [status]);

  const getImagesSrc = useCallback(() => {
    let images: Array<string> = [];
    let totalPages = 0;

    const getDocument = () => {
      Toast.loading('PDF 加载中...', 0);
      var loadingTask = window.pdfjsLib.getDocument({
        url: type === 'base64' ? `data:application/pdf;base64,${pdf}` : pdf,
      });
      loadingTask.promise
        .then((pdf: any) => {
          totalPages = pdf._pdfInfo.numPages;
          recurseGetBase64PngString(pdf);
        })
        .catch((e: any) => {
          Toast.fail('PDF 加载失败,请稍后再试!');
          console.log(e);
        });
    };

    const recurseGetBase64PngString = (pdf: any, pageNumber: number = 1) => {
      pdf.getPage(Number(pageNumber)).then((page: any) => {
        let canvas = document.createElement('canvas');
        canvas.id = 'pdf-canvas';

        const scale = 1.5;
        const viewport = page.getViewport({ scale: scale });
        const context = canvas.getContext('2d');
        canvas.height = viewport.height;
        canvas.width = viewport.width;
        const renderContext = {
          canvasContext: context,
          viewport: viewport,
        };

        const renderTask = page.render(renderContext);
        renderTask.promise
          .then(() => {
            const src = canvas.toDataURL('image/png', quality);
            images = images.concat(src);
            const canvasNode = document.getElementById('pdf-canvas');
            canvasNode && document.removeChild(canvasNode);
            if (pageNumber === totalPages) {
              setImages(images);
              Toast.hide();
            } else {
              recurseGetBase64PngString(pdf, pageNumber + 1);
            }
          })
          .catch((e: any) => {
            Toast.fail('PDF 加载失败,请稍后再试!');
            console.log(e);
          });
      });
    };

    getDocument();
  }, [pdf]);

  return [images, error] as [Array<string>, Error];
}

模拟器实测

PDF 文件取自 mozilla/pdf.js

image.png

image.png

大力支持

pdf.js

react-pdf

note-on-iOS-work-with-JavaScript

SwiftUI-webview