环境
| 软件 | 版本 |
|---|
| python | 3.11.2 |
| pymupdf | 1.22.5 |
代码
import fitz
import re
import os
file_path = r'.\test.pdf'
dir_path = r'.\output'
def pdf2image1(path, pic_path):
checkIM = r"/Subtype(?= */Image)"
pdf = fitz.open(path)
lenXREF = pdf.xref_length()
count = 1
for i in range(1, lenXREF):
text = pdf.xref_object(i)
isImage = re.search(checkIM, text)
if not isImage:
continue
pix = fitz.Pixmap(pdf, i)
if (pix.size < 3*1024*1024):
continue
new_name = f"img_{count}.png"
output_filename = os.path.join(pic_path, new_name)
pix.save(output_filename)
print("save to [", output_filename,"]", pix.irect.width, "x", pix.irect.height, ", ", pix.size)
count += 1
pix = None
curPath = os.getcwd()
print("当前目录", curPath)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
pdf2image1(file_path, dir_path)