参考文档:
https://support.huaweicloud.com/api-moderation/moderation_03_0018.html
华为云内容审核 - 图片内容审核和文本内容审核,代码示例:
import base64
import json
import requests
from visionline.utils.logger import logger
def encode_to_base64(filename):
"""
图片文件转base64编码
:param filename:
:return:
"""
imgstr = ""
with open(filename, 'rb') as file:
imgstr = base64.b64encode(file.read())
return imgstr
def get_token():
"""
获取token
"""
username = 'username' # 用户名
password = 'password' # 密码
domain = 'huaweiyun_account_name' # 华为账号 account_name
region_name = 'cn-north-4' # 区域 例如: cn-north-4
X_TOKEN = None
auth_data = {
"auth": {
"identity": {
"password": {
"user": {
"name": username,
"password": password,
"domain": {
"name": domain
}
}
},
"methods": [
"password"
]
},
"scope": {
"project": {
"name": region_name
}
}
}
}
_url = 'https://iam.myhuaweicloud.com/v3/auth/tokens'
headers = {'Content-Type': 'application/json'}
resp = requests.post(url=_url, headers=headers, data=json.dumps(auth_data))
logger.info(f"resp = {resp}")
status_code = resp.status_code
logger.info(f"-- get_token - resp.status_code = {resp.status_code}")
logger.info(f"-- get_token - resp.json() = {resp.json()}")
try:
X_TOKEN = resp.headers['X-Subject-Token']
except:
X_TOKEN = None
return status_code, X_TOKEN
def moderation_image(token, image, image_url):
"""
图片审核
:param token: token
:param image: 图片base64
:param url: 图片地址
检测场景:
politics:是否涉及政治人物的检测。
terrorism:是否包含涉政暴恐元素的检测。
porn:是否包含涉黄内容元素的检测。
ad:是否包含广告图文的检测。
"""
_url = 'https://moderation.cn-north-4.myhuaweicloud.com/v1.0/moderation/image'
categories = ['porn', 'politics', 'terrorism',
# 'ad'
]
threshold = None
moderation_rule = "default"
contents = None
if image:
image = image.decode("utf-8")
_data = {
"image": image,
"url": image_url,
"categories": categories,
"threshold": threshold,
"moderation_rule": moderation_rule
}
_headers = {
"Content-Type": "application/json",
"X-Auth-Token": token
}
data = bytes(json.dumps(_data), 'utf8')
resp = requests.post(url=_url, headers=_headers, data=data)
logger.info(f"resp = {resp}")
status_code = resp.status_code
logger.info(f"-- moderation_image - resp.status_code = {resp.status_code}")
logger.info(f"-- moderation_image - resp.json() = {resp.json()}")
contents = resp.json()
return status_code, contents
def deal_moderation_image_resp(contents):
"""
处理图片审核接口响应结果
:return:
"""
# result 调用失败时无此字段。
if 'result' not in contents:
return False, "图片审核接口调用失败"
# logger.info(f"contents = {json.dumps(contents, indent=4)}")
result_info = contents['result']
# 审核结果是否通过 - block:包含敏感信息,不通过 pass:不包含敏感信息,通过 review:需要人工复检
suggestion_result = result_info['suggestion']
if suggestion_result == 'pass':
return True, '审核通过'
else:
check_result_str = ''
# 具体每个场景的检测结果 block:包含敏感信息,不通过 pass:不包含敏感信息,通过 review:需要人工复检
category_suggestions_info = result_info['category_suggestions']
politics_result = category_suggestions_info.get('politics', None) # politics:是否涉及政治人物的检测。
# ad_result = category_suggestions_info.get('ad', None) # ad:是否包含广告图文的检测。
terrorism_result = category_suggestions_info.get('terrorism', None) # terrorism:是否包含涉政暴恐元素的检测。
porn_result = category_suggestions_info.get('porn', None) # porn:是否包含涉黄内容元素的检测。
if politics_result and politics_result != 'pass':
if politics_result == 'block':
check_result_str += '涉政'
if politics_result == 'review':
check_result_str += '疑似涉政'
if terrorism_result and terrorism_result != 'pass':
if terrorism_result == 'block':
check_result_str = f"{check_result_str};涉暴" if check_result_str else f"{check_result_str}涉暴"
if terrorism_result == 'review':
check_result_str = f"{check_result_str};疑似涉暴" if check_result_str else f"{check_result_str}疑似涉暴"
if porn_result and porn_result != 'pass':
if porn_result == 'block':
check_result_str = f"{check_result_str};涉黄" if check_result_str else f"{check_result_str}涉黄"
if porn_result == 'review':
check_result_str = f"{check_result_str};疑似涉黄" if check_result_str else f"{check_result_str}疑似涉黄"
# 广告不检测 pass
return False, check_result_str
def public_moderation_image(image_base64='', image_url=None):
"""对外直接调用,返回图片审核结果"""
# 获取token TODO Token有效期为24小时,可以建立token缓存机制,或者存mongo
status_code, token = get_token()
logger.info(f"图片审核获取token | status_code = {status_code} | token = {token}"), ''
if not token:
return False, {"code": 400, 'msg': '获取图片审核token失败'}, ''
status_code, contents = moderation_image(token, image_base64, image_url)
logger.info(f"图片审核接口 | status_code = {status_code} | contents = {type(contents)} | {contents}")
if not contents:
return False, {"code": 400, 'msg': '图片审核接口响应失败'}
# 处理图片审核接口响应结果
flag, result_info = deal_moderation_image_resp(contents)
logger.info(f"审核结果: {flag} | {result_info}")
if not flag:
return False, {'code': 400, 'msg': result_info}, contents
else:
return True, {'code': 200, 'msg': 'success', 'content': result_info}, contents
def moderation_text(token, text, type='content'):
"""
:param token:
:param text:
:param type:
:return:
"""
# 检测场景 Array politics:涉政 porn:涉黄 ad:广告 abuse:辱骂 contraband:违禁品 flood:灌水
categories = [
# 检测场景
'ad', # ad:广告
'politics', # politics:涉政
'porn', # porn:涉黄
'abuse', # abuse:辱骂
'contraband', # contraband:违禁品
'flood' # flood:灌水
]
_url = 'https://moderation.cn-north-4.myhuaweicloud.com/v1.0/moderation/text'
_data = {
"categories": categories,
# items: 待检测的文本列表 text 待检测文本 type 文本类型
"items": [
{"text": text, "type": type}
]
}
_headers = {
"Content-Type": "application/json",
"X-Auth-Token": token
}
data = bytes(json.dumps(_data), 'utf8')
resp = requests.post(url=_url, data=data, headers=_headers)
logger.info(f"resp = {resp}")
status_code = resp.status_code
logger.info(f"-- moderation_text - resp.status_code = {resp.status_code}")
logger.info(f"-- moderation_text - resp.json() = {resp.json()}")
contents = resp.json()
return status_code, contents
def deal_moderation_text_resp(contents):
"""处理文本审核接口响应结果"""
# result 调用失败时无此字段。
if 'result' not in contents:
return False, "图片审核接口调用失败"
# logger.info(f"contents = {json.dumps(contents, indent=4)}")
result_info = contents['result']
# 审核结果是否通过 - block:包含敏感信息,不通过 pass:不包含敏感信息,通过 review:需要人工复检
suggestion_result = result_info['suggestion']
if suggestion_result == 'pass':
return True, '审核通过'
else:
return False, '审核不同过'
def public_moderation_text(text):
"""对外直接调用,返回文本审核结果"""
# 获取token TODO Token有效期为24小时,可以建立token缓存机制,或者存mongo
status_code, token = get_token()
logger.info(f"图片审核获取token | status_code = {status_code} | token = {token}"), ''
if not token:
return False, {"code": 400, 'msg': '获取图片审核token失败'}, ''
status_code, contents = moderation_text(token, text)
logger.info(f"图片审核接口 | status_code = {status_code} | contents = {contents}")
if not contents:
return False, {"code": 400, 'msg': '图片审核接口响应失败'}, ''
# 处理文本审核接口响应结果
flag, result_info = deal_moderation_text_resp(contents)
logger.info(f"审核结果: {flag} | {result_info}")
if not flag:
return False, {'code': 400, 'msg': result_info}, contents
else:
return True, {'code': 200, 'msg': 'success', 'content': result_info}, contents
if __name__ == '__main__':
# TODO Token有效期为24小时,可以建立token缓存机制,或者存mongo也可以
# 文本内容审核
# text = ''
# text = ''
text = ''
flag, resp_info, detail = public_moderation_text(text)
logger.info(f"flag = {flag}, resp_info = {resp_info}, detail = {detail}")
""" 图片内容审核
# 测试图片url
# demo_data_url = 'https://sdk-obs-source-save.obs.cn-north-4.myhuaweicloud.com/terrorism.jpg' # 正常
demo_data_url = 'https://img0.baidu.com/it/u=2147892510,3124659829&fm=26&fmt=auto&gp=0.jpg' # 毛主席
# 真人
# demo_data_url = 'https://img1.baidu.com/it/u=4210503503,1536203928&fm=26&fmt=auto&gp=0.jpg'
#
# demo_data_url = 'https://img1.baidu.com/it/u=671407126,81369699&fm=26&fmt=auto&gp=0.jpg'
#
# demo_data_url = 'https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fimages.china.cn%2Fattachement%2Fjpg%2Fsite1000%2F20150818%2Fd02788e9b72d173d38e730.jpg&refer=http%3A%2F%2Fimages.china.cn&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=jpeg?sec=1629366083&t=0c2b935f9987f8700d782f695dcd0ada'
image_a = './temp.png'
image_b = './.jpeg'
image_c = '希.png'
image_d = '江.jpg'
with open(image_d, 'rb') as f:
image_bytes = f.read()
# 读取图片文件base64
image_base64 = base64.b64encode(image_bytes)
flag, resp_info, detail = public_moderation_image(image_base64=image_base64)
# flag, resp_info = public_moderation_image(image_url=demo_data_url)
logger.info(f"--- flag = {flag}, resp_info = {resp_info}, detail = {detail}")
"""