什么都不必说--简单图片金额识别OCR

416 阅读1分钟
需求 获取图片中金额

1.添加maven

  <!--图片识别-->
        <dependency>
            <groupId>net.sourceforge.tess4j</groupId>
            <artifactId>tess4j</artifactId>
            <version>2.0.1</version>
            <exclusions>
                <exclusion>
                    <groupId>com.sun.jna</groupId>
                    <artifactId>jna</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

2.安装tessdata

brew install tesseract 

3.工具类

public class OCRUtil {
    private static String  tessdataPath;
    private static String STRING_TESS_VARIABLE_KEY = "tessedit_char_whitelist";
    private static String STRING_TESS_VARIABLE_VALUE = "0123456789.¥";
    private static String STRING_TESS_NAME = "tessdata";

    private ITesseract instance;

    private static OCRUtil ocrUtil;

    private OCRUtil(ITesseract instance){
        this.instance = instance;
    }

    private static final LogUtil logUtil  = LogUtil.init(OCRUtil.class);

    //必须加锁,在高并发情况下会出现jvm崩坏情况
    public  synchronized String getAmount(BufferedImage bi) throws IOException {
        long start = System.currentTimeMillis();
        try {

            String ocrResult = instance.doOCR(bi);
            if(StringUtil.isEmpty(ocrResult)){
                return null;
            }
            if(ocrResult.indexOf("¥") == -1){
                return null;
            }
            logUtil.i("金额识别 为:%s\n金额识别耗时:%s毫秒",ocrResult,System.currentTimeMillis()-start);
            return ocrResult.replace("\n","").substring(ocrResult.indexOf("¥") + 1).trim();
        } catch (TesseractException e) {
            System.err.println(e.getMessage());
        }
        return null;
    }

    public static synchronized OCRUtil init(){

        if(null == ocrUtil){
            ITesseract instance = new Tesseract(); // JNA Direct Mapping
            instance.setTessVariable(STRING_TESS_VARIABLE_KEY, STRING_TESS_VARIABLE_VALUE);
//            File tessDataFolder = LoadLibs.extractTessResources(STRING_TESS_NAME);
//            logUtil.d(tessDataFolder.getAbsolutePath());
            //从外部获取tessdataPath或者查询tessdata所在位置
            instance.setDatapath(tessdataPath);
            ocrUtil = new OCRUtil(instance);
        }

        return ocrUtil;

    }

}