"pdfjs-dist": "^2.10.377"
安装
npm install pdfjs-dist --save
头部引入,解决No "GlobalWorkerOptions.workerSrc" specified
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist'
GlobalWorkerOptions.workerSrc = pdfjsWorker
读取文件
const readPDFPage = async(doc, pageNo) => {
const page = await doc.getPage(pageNo)
const tokenizedText = await page.getTextContent()
const pageText = tokenizedText.items.map(token => token.str).join('')
return pageText.replaceAll(/\s+/g, '\n')
}
const readPDFDoc = (file, resolve, reject) => {
let reader = new FileReader()
reader.readAsArrayBuffer(file)
reader.onload = async(event) => {
try {
const doc = await getDocument(event.target.result).promise
const pageTextPromises = []
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
pageTextPromises.push(readPDFPage(doc, pageNo))
}
const pageTexts = await Promise.all(pageTextPromises)
resolve(pageTexts.join(''))
} catch (err) {
reject(err)
}
}
reader.onerror = reject
}
参考
github.com/mozilla/pdf…