code图片识别

125 阅读2分钟
在common模块导入tess4j对应的依赖
<dependency>
    <groupId>net.sourceforge.tess4j</groupId>
    <artifactId>tess4j</artifactId>
    <version>5.2.0</version>
</dependency>
导入中文字体库, 把资料中的tessdata文件夹拷贝到自己的工作空间下-chisimtraineddata
package com.heima.common.tess4j;

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;

import java.io.File;

public class Application {

    public static void main(String[] args) {
        try {
            //获取本地图片
            File file = new File("D:\\26.png");
            //创建Tesseract对象
            ITesseract tesseract = new Tesseract();
            //设置字体库路径
            tesseract.setDatapath("D:\\workspace\\tessdata");
            //中文识别
            tesseract.setLanguage("chi_sim");
            //执行ocr识别
            String result = tesseract.doOCR(file);
            //替换回车和tal键  使结果为一行
            result = result.replaceAll("\\r|\\n","-").replaceAll(" ","");
            System.out.println("识别的结果为:"+result);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
:在heima-leadnews-common中创建工具类,简单封装一下tess4j
<dependency>
    <groupId>net.sourceforge.tess4j</groupId>
    <artifactId>tess4j</artifactId>
    <version>5.2.0</version>
</dependency>
wemedia中的配置中添加两个属性
tess4j:
  data-path: D:\workspace\tessdata
  language: chi_sim
  package com.heima.common.tess4j;


import com.heima.common.exception.LeadException;
import lombok.Data;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.lang3.StringUtils;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;

import java.awt.image.BufferedImage;

@Data
@Component
@ConfigurationProperties(prefix = "tess4j")
public class Tess4jClient {
    private String dataPath;
    private String language;

    public String doOCR(BufferedImage image) throws TesseractException {
        if(StringUtils.isBlank(dataPath)){
            throw new LeadException(500,"Tess4j,图片路径为空!");
        }
        //创建Tesseract对象
        ITesseract tesseract = new Tesseract();
        //设置字体库路径
        tesseract.setDatapath(dataPath);
        //中文识别
        tesseract.setLanguage(language);
        //执行ocr识别
        String result = tesseract.doOCR(image);
        //替换回车和tal键  使结果为一行
        result = result.replaceAll("\\r|\\n", "-").replaceAll(" ", "");
        return result;
    }
}
在WmNewsAuditServiceImpl中添加方法
@Autowired
private Tess4jClient tess4jClient;

private boolean handleImageSensitiveScan(byte[] bytes,Integer wmNewsId){
    try {
        //图片byte[] bytes数组识别文字审核---begin-----
        //从byte[]转换为butteredImage
        // ImageIO
        ByteArrayInputStream in = new ByteArrayInputStream(bytes);
        BufferedImage imageFile = ImageIO.read(in);
        //识别图片的文字
        String result = tess4jClient.doOCR(imageFile);
        //审核是否包含自管理的敏感词
        boolean isSensitive = handleSensitiveScan(result, wmNewsId);
        return isSensitive;
    }catch (Exception e){
        e.printStackTrace();
        return false;
    }
}
在WmNewsAuditServiceImpl中的handleImageScan方法上添加如下代码
try {
    for (String image : images) {
        byte[] bytes = minioService.downLoadFile(image);

        //图片识别文字审核---begin-----

        //检测图片中是否包含敏感词
        boolean b = handleImageSensitiveScan(bytes, wmNewsId);
        if(!b){
            log.error("图片敏感词检测不通过");
            return false;
        }

        //图片识别文字审核---end-----
        imageList.add(bytes);

    } 
}catch (Exception e){
    e.printStackTrace();
} 
@Configuration
@ComponentScan({"com.heima.common.aliyun","com.heima.common.knife4j","com.heima.common.exception",
        "com.heima.common.bcrypt","com.heima.common.tess4j"})
public class InitConfig {
}  
  public void auditWmNews(WmNews wmNews) {
        log.info("开始审核文章");
//        判断状态是否待审核
        if(wmNews.getStatus()!=1){
            log.info("不是待审核状态,结束");
            return ;
        }
        Integer wmNewsId = wmNews.getId();
//        获取文章中的文本和图片 ,key - text  ,key - image
        Map<String,Object> map = getTextAndImage(wmNews);
        String text = map.get("text").toString();
        List<byte[]> imageList = (List<byte[]>)map.get("image");
//        阿里云文本审核
        boolean b = checkText(text,wmNewsId);
        if(!b){
            log.error("阿里云审核文本不通过");
            return ;
            //throw new LeadException(AppHttpCodeEnum.TEXT_ILLEGAL);
        }
//        阿里云图片审核
        boolean b1 = checkImage(imageList,wmNewsId);
        if(!b1){
            log.error("阿里云审核图片不通过");
            return ;
            //throw new LeadException(AppHttpCodeEnum.PARAM_IMAGE_ILLEGAL);
        }
//        敏感词审核
        boolean b2 = checkSensitive(text,wmNewsId);
        if(!b2){
            log.error("敏感词检测不通过");
            return ;
            //throw new LeadException(AppHttpCodeEnum.PARAM_IMAGE_ILLEGAL);
        }
        if(!CollectionUtils.isEmpty(imageList)){
            boolean b3 = true;
            for (byte[] bytes : imageList) {
                b3 = handleImageSensitiveScan(bytes,wmNewsId);
                if(!b3){
                    log.error("图片敏感词检测不通过");
                    break ;
                }
            }
            if(!b3){
                log.error("图片敏感词检测不通过");
                return ;
            }
        }
//        根据id 修改文章状态, 1-> 8
        updateWmNewsStaus(wmNewsId,8,null,null);
//        如果有定时发布,并且没有到发布时间,不要发布,直接结束
        if(wmNews.getPublishTime() != null &&
                System.currentTimeMillis() < wmNews.getPublishTime().getTime()){
            log.info("没有到发布时间,结束");
            return ;
        }
//        对象转换
        WmNewsResultDTO newsResultDTO = BeanHelper.copyProperties(wmNews, WmNewsResultDTO.class);
//        发布文章,远程调用article服务
        try {
            Long articleId = articleFeign.saveArticle(newsResultDTO);
//        修改文章状态,改成-9
            updateWmNewsStaus(wmNewsId,9,null,articleId);
        }catch (Exception e){
            log.error("远程调用article保存文章,失败");
            e.printStackTrace();
            throw new LeadException(AppHttpCodeEnum.SERVER_ERROR);
        }

    } 

image.png

image.png