【vue3进阶】OCR识别图片文字

5,767 阅读1分钟

Tesseract.js 是一个javascript库,可以从图像中获取几乎任何语言的单词,支持文本转pdf功能,精准度很高。

1. 安装

npm install tesseract.js

2. 示例代码(vue3版)

<template>
  <div class="container">
    <div class="l_box">
      <el-image class="c_img" :src="url" fit="contain" />
      <div class="btn_box">
        <el-button type="primary" @click="getImgText" style="margin-right: 10px;" :disabled="loading">解 析</el-button>
        <el-upload class="upload-demo" :limit="1" :on-change="handleChange" accept=".jpg, .jpeg, .png, .bmp"
          :show-file-list="false" :auto-upload="false">
          <el-button type="primary" :disabled="loading">上 传</el-button>
        </el-upload>
        <el-button type="primary" @click="download" style="margin-left: 10px;" :disabled="loading">下载PDF</el-button>
      </div>
    </div>
    <pre class="c_value" v-loading="loading">{{ word }}</pre>
  </div>
</template>

<script setup>
import { ElMessage } from 'element-plus'
import { createWorker } from 'tesseract.js';

let url = ref('https://tesseract.projectnaptha.com/img/eng_bw.png')
let word = ref('')
let loading = ref(false)
let worker = ref(null)
let pdf = ref(null)

onMounted(() => {
  init()
})

onUnmounted(() => {
  // 卸载插件  
  worker.value.terminate()
})

const init = async () => {
  // 初始化插件  
  worker.value = await createWorker(['eng', 'chi_sim'], 1, {
    logger: m => console.log(m),
  });
}

// 获取图片链接文本
const getImgText = async () => {
  loading.value = true
  try {
    const { data } = await worker.value.recognize(url.value, { pdfTitle: 'Example PDF' }, { pdf: true });
    pdf.value = data.pdf
    word.value = data.text
    loading.value = false
  } catch (error) {
    loading.value = false
    ElMessage({
      message: '解析失败',
      type: 'warning',
    })
  }
}

// 上传附件解析
const handleChange = async (file) => {
  url.value = URL.createObjectURL(file.raw)
  getImgText()
}


// 下载PDF
const download = () => {
  const blob = new Blob([new Uint8Array(pdf.value)], { type: 'application/pdf' });
  const url = URL.createObjectURL(blob);
  const link = document.createElement('a');
  link.href = url;
  link.download = 'example.pdf';
  link.click();
  URL.revokeObjectURL(url);
}

</script>
<style scoped lang="scss">
.container {
  width: 100%;
  height: 100%;
  padding: 20px;
  display: flex;
  justify-content: space-between;
  box-sizing: border-box;

  .l_box {
    width: 48%;
    height: 100%;
    margin-right: 2%;

    .c_img {
      width: 100%;
      height: calc(100% - 40px);
    }

    .btn_box {
      height: 50px;
      display: flex;
      align-items: center;
    }
  }

  .c_value {
    width: 50%;
    min-height: 500px;
    border: 1px solid #999;
    line-height: 30px;
    padding: 20px;
  }
}
</style>

效果图

image.png

更多api可以去官网尝试了 tesseract.projectnaptha.com