import mammoth from "mammoth";
import * as pdfjsLib from "pdfjs-dist/build/pdf";//"pdfjs-dist": "2.0.943"版本
import pdfjsWorker from "pdfjs-dist/build/pdf.worker.entry";
安装处理pdf解析 类私有方法的 Babel 插件
npm install --save-dev @babel/plugin-proposal-class-properties @babel/plugin-proposal-nullish-coalescing-operator@babel/plugin-proposal-private-methods
babel.config.js中设置
plugins: [
"@babel/plugin-proposal-optional-chaining",
"@babel/plugin-proposal-nullish-coalescing-operator",
"@babel/plugin-proposal-private-methods",
],
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsWorker;
changeFile(file, fileList) {
const type = file.name.split(".").pop().toLowerCase();
if (file.status !== "ready") return;
const reader = new FileReader();
const handleTextFile = () => {
reader.readAsText(file.raw);
reader.onload = () => {
if (reader.result) {
this.textarea = reader.result;
}
};
};
const handleDocxFile = () => {
reader.readAsArrayBuffer(file.raw);
reader.onload = (e) => {
const arrayBuffer = e.target.result;
mammoth.convertToHtml({ arrayBuffer }).then((result) => {
this.textarea = result.value;
}).catch(console.error);
};
};
const handlePdfFile = async () => {
reader.readAsArrayBuffer(file.raw);
reader.onload = async (e) => {
const typedarray = new Uint8Array(e.target.result);
const pdf = await pdfjsLib.getDocument(typedarray).promise;
let text = "";
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
const pageText = content.items.map(item => item.str).join("");
text += `${pageText}<br/>`; // 每页文本用换行分隔
}
this.textarea = text;
};
};
switch (type) {
case "txt":
handleTextFile();
break;
case "docx":
handleDocxFile();
break;
case "pdf":
handlePdfFile();
break;
default:
return; // 不支持的文件类型
}
}