在python中时常在处理pdf和html的路上非常的痛苦,这里作为日常工作的小计,来一次暴利的输出
import fitz
from tqdm import tqdm
def pdf2html(input_path,html_path):
doc = fitz.open(input_path)
for page in tqdm(doc):
html_content = page.getText('html')
print("开始输出html文件")
with open(html_path, 'w', encoding='utf8', newline="") as fp:
fp.write(html_content)
input_path = r'G:\back\pyfile\翻译\pdf_translate-master\3.pdf' # 如果报错 就用绝对路径
html_path = r'G:\back\pyfile\翻译\pdf_translate-master\input.html'
pdf2html(input_path,html_path)
爽到你了吗? 感谢关注!