华为云内容审核 - 图片内容审核和文本内容审核

357 阅读4分钟

参考文档:

https://support.huaweicloud.com/api-moderation/moderation_03_0018.html

华为云内容审核 - 图片内容审核和文本内容审核,代码示例:

import base64
import json
import requests
from visionline.utils.logger import logger


def encode_to_base64(filename):
    """
    图片文件转base64编码
    :param filename:
    :return:
    """
    imgstr = ""
    with open(filename, 'rb') as file:
        imgstr = base64.b64encode(file.read())
    return imgstr


def get_token():
    """
    获取token
    """
    username = 'username'  # 用户名
    password = 'password'  # 密码
    domain = 'huaweiyun_account_name'  # 华为账号 account_name
    region_name = 'cn-north-4'  # 区域 例如: cn-north-4
    X_TOKEN = None

    auth_data = {
        "auth": {
            "identity": {

                "password": {
                    "user": {
                        "name": username,
                        "password": password,
                        "domain": {
                            "name": domain
                        }
                    }
                },
                "methods": [
                    "password"
                ]
            },
            "scope": {
                "project": {
                    "name": region_name
                }
            }
        }
    }

    _url = 'https://iam.myhuaweicloud.com/v3/auth/tokens'
    headers = {'Content-Type': 'application/json'}
    resp = requests.post(url=_url, headers=headers, data=json.dumps(auth_data))
    logger.info(f"resp = {resp}")
    status_code = resp.status_code
    logger.info(f"-- get_token - resp.status_code = {resp.status_code}")
    logger.info(f"-- get_token - resp.json() = {resp.json()}")
    try:
        X_TOKEN = resp.headers['X-Subject-Token']
    except:
        X_TOKEN = None
    return status_code, X_TOKEN


def moderation_image(token, image, image_url):
    """
    图片审核
    :param token: token
    :param image: 图片base64
    :param url: 图片地址
    检测场景:
        politics:是否涉及政治人物的检测。
        terrorism:是否包含涉政暴恐元素的检测。
        porn:是否包含涉黄内容元素的检测。
        ad:是否包含广告图文的检测。
    """
    _url = 'https://moderation.cn-north-4.myhuaweicloud.com/v1.0/moderation/image'
    categories = ['porn', 'politics', 'terrorism',
                  # 'ad'
                  ]
    threshold = None
    moderation_rule = "default"
    contents = None

    if image:
        image = image.decode("utf-8")

    _data = {
        "image": image,
        "url": image_url,
        "categories": categories,
        "threshold": threshold,
        "moderation_rule": moderation_rule
    }

    _headers = {
        "Content-Type": "application/json",
        "X-Auth-Token": token
    }

    data = bytes(json.dumps(_data), 'utf8')
    resp = requests.post(url=_url, headers=_headers, data=data)
    logger.info(f"resp = {resp}")
    status_code = resp.status_code
    logger.info(f"-- moderation_image - resp.status_code = {resp.status_code}")
    logger.info(f"-- moderation_image - resp.json() = {resp.json()}")
    contents = resp.json()
    return status_code, contents


def deal_moderation_image_resp(contents):
    """
    处理图片审核接口响应结果
    :return:
    """
    # result 调用失败时无此字段。
    if 'result' not in contents:
        return False, "图片审核接口调用失败"
    # logger.info(f"contents = {json.dumps(contents, indent=4)}")
    result_info = contents['result']
    # 审核结果是否通过 - block:包含敏感信息,不通过 pass:不包含敏感信息,通过 review:需要人工复检
    suggestion_result = result_info['suggestion']
    if suggestion_result == 'pass':
        return True, '审核通过'
    else:
        check_result_str = ''
        # 具体每个场景的检测结果 block:包含敏感信息,不通过 pass:不包含敏感信息,通过 review:需要人工复检
        category_suggestions_info = result_info['category_suggestions']
        politics_result = category_suggestions_info.get('politics', None)  # politics:是否涉及政治人物的检测。
        # ad_result = category_suggestions_info.get('ad', None)  # ad:是否包含广告图文的检测。
        terrorism_result = category_suggestions_info.get('terrorism', None)  # terrorism:是否包含涉政暴恐元素的检测。
        porn_result = category_suggestions_info.get('porn', None)  # porn:是否包含涉黄内容元素的检测。
        if politics_result and politics_result != 'pass':
            if politics_result == 'block':
                check_result_str += '涉政'
            if politics_result == 'review':
                check_result_str += '疑似涉政'

        if terrorism_result and terrorism_result != 'pass':
            if terrorism_result == 'block':
                check_result_str = f"{check_result_str};涉暴" if check_result_str else f"{check_result_str}涉暴"
            if terrorism_result == 'review':
                check_result_str = f"{check_result_str};疑似涉暴" if check_result_str else f"{check_result_str}疑似涉暴"

        if porn_result and porn_result != 'pass':
            if porn_result == 'block':
                check_result_str = f"{check_result_str};涉黄" if check_result_str else f"{check_result_str}涉黄"
            if porn_result == 'review':
                check_result_str = f"{check_result_str};疑似涉黄" if check_result_str else f"{check_result_str}疑似涉黄"
        # 广告不检测 pass
        return False, check_result_str


def public_moderation_image(image_base64='', image_url=None):
    """对外直接调用,返回图片审核结果"""
    # 获取token TODO Token有效期为24小时,可以建立token缓存机制,或者存mongo
    status_code, token = get_token()
    logger.info(f"图片审核获取token | status_code = {status_code} | token = {token}"), ''
    if not token:
        return False, {"code": 400, 'msg': '获取图片审核token失败'}, ''
    status_code, contents = moderation_image(token, image_base64, image_url)
    logger.info(f"图片审核接口 | status_code = {status_code} | contents = {type(contents)} | {contents}")
    if not contents:
        return False, {"code": 400, 'msg': '图片审核接口响应失败'}
    # 处理图片审核接口响应结果
    flag, result_info = deal_moderation_image_resp(contents)
    logger.info(f"审核结果: {flag} | {result_info}")
    if not flag:
        return False, {'code': 400, 'msg': result_info}, contents
    else:
        return True, {'code': 200, 'msg': 'success', 'content': result_info}, contents


def moderation_text(token, text, type='content'):
    """
    :param token:
    :param text:
    :param type:
    :return:
    """
    # 检测场景 Array politics:涉政 porn:涉黄 ad:广告 abuse:辱骂 contraband:违禁品 flood:灌水
    categories = [
        # 检测场景
        'ad',  # ad:广告
        'politics',  # politics:涉政
        'porn',  # porn:涉黄
        'abuse',  # abuse:辱骂
        'contraband',  # contraband:违禁品
        'flood'  # flood:灌水
    ]
    _url = 'https://moderation.cn-north-4.myhuaweicloud.com/v1.0/moderation/text'
    _data = {
        "categories": categories,
        # items: 待检测的文本列表  text 待检测文本 type 文本类型
        "items": [
            {"text": text, "type": type}
        ]
    }
    _headers = {
        "Content-Type": "application/json",
        "X-Auth-Token": token
    }
    data = bytes(json.dumps(_data), 'utf8')
    resp = requests.post(url=_url, data=data, headers=_headers)
    logger.info(f"resp = {resp}")
    status_code = resp.status_code
    logger.info(f"-- moderation_text - resp.status_code = {resp.status_code}")
    logger.info(f"-- moderation_text - resp.json() = {resp.json()}")
    contents = resp.json()
    return status_code, contents


def deal_moderation_text_resp(contents):
    """处理文本审核接口响应结果"""
    # result 调用失败时无此字段。
    if 'result' not in contents:
        return False, "图片审核接口调用失败"
    # logger.info(f"contents = {json.dumps(contents, indent=4)}")
    result_info = contents['result']
    # 审核结果是否通过 - block:包含敏感信息,不通过 pass:不包含敏感信息,通过 review:需要人工复检
    suggestion_result = result_info['suggestion']
    if suggestion_result == 'pass':
        return True, '审核通过'
    else:
        return False, '审核不同过'


def public_moderation_text(text):
    """对外直接调用,返回文本审核结果"""
    # 获取token TODO Token有效期为24小时,可以建立token缓存机制,或者存mongo
    status_code, token = get_token()
    logger.info(f"图片审核获取token | status_code = {status_code} | token = {token}"), ''
    if not token:
        return False, {"code": 400, 'msg': '获取图片审核token失败'}, ''

    status_code, contents = moderation_text(token, text)
    logger.info(f"图片审核接口 | status_code = {status_code} | contents = {contents}")
    if not contents:
        return False, {"code": 400, 'msg': '图片审核接口响应失败'}, ''
    # 处理文本审核接口响应结果
    flag, result_info = deal_moderation_text_resp(contents)
    logger.info(f"审核结果: {flag} | {result_info}")
    if not flag:
        return False, {'code': 400, 'msg': result_info}, contents
    else:
        return True, {'code': 200, 'msg': 'success', 'content': result_info}, contents


if __name__ == '__main__':
    # TODO Token有效期为24小时,可以建立token缓存机制,或者存mongo也可以
    # 文本内容审核
    # text = ''
    # text = ''
    text = ''
    flag, resp_info, detail = public_moderation_text(text)
    logger.info(f"flag = {flag}, resp_info = {resp_info}, detail = {detail}")

    """ 图片内容审核
    # 测试图片url
    # demo_data_url = 'https://sdk-obs-source-save.obs.cn-north-4.myhuaweicloud.com/terrorism.jpg'  # 正常
    demo_data_url = 'https://img0.baidu.com/it/u=2147892510,3124659829&fm=26&fmt=auto&gp=0.jpg'  # 毛主席
    # 真人
    # demo_data_url = 'https://img1.baidu.com/it/u=4210503503,1536203928&fm=26&fmt=auto&gp=0.jpg'
    # 
    # demo_data_url = 'https://img1.baidu.com/it/u=671407126,81369699&fm=26&fmt=auto&gp=0.jpg'
    # 
    # demo_data_url = 'https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fimages.china.cn%2Fattachement%2Fjpg%2Fsite1000%2F20150818%2Fd02788e9b72d173d38e730.jpg&refer=http%3A%2F%2Fimages.china.cn&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=jpeg?sec=1629366083&t=0c2b935f9987f8700d782f695dcd0ada'

    image_a = './temp.png'
    image_b = './.jpeg'
    image_c = '希.png'
    image_d = '江.jpg'
    with open(image_d, 'rb') as f:
        image_bytes = f.read()
    # 读取图片文件base64
    image_base64 = base64.b64encode(image_bytes)

    flag, resp_info, detail = public_moderation_image(image_base64=image_base64)
    # flag, resp_info = public_moderation_image(image_url=demo_data_url)
    logger.info(f"--- flag = {flag}, resp_info = {resp_info}, detail = {detail}")
    """