背景
最近在huggingface上看到了一个动漫化的模型,效果实在是太赞了,撸到本地跑了下,Nice~~
图中的app是自己用QT写的小玩具,集中验证收集到的一些模型
模型介绍
这个是模型的地址GitHub - bryandlee/animegan2-pytorch: PyTorch implementation of AnimeGANv2,是GitHub - TachibanaYoshino/AnimeGANv2: [Open Source]. The improved version of AnimeGAN. Landscape photos/videos to anime的PyTorch版本,不过原版没有针对人脸做训练,在人脸的表现效果上并不太好。这个模型的训练数据并没有公开,从效果上看,应该是偏韩漫风格吧(漫画看的少,也不确定,哈哈哈)。
模型的使用
我是将模型转成了ONNX,使用ONNXRuntime加载,这个模型输入尺寸固定成了512x512,尝试动态化失败了,于是采用了曲线救国方案,先将图片通过Fit的方式绘制到512x512的空白图片上
max_size = max(source.shape[0], source.shape[1])
scale_up = 512 / max_size
source = cv2.resize(source, dsize=(0,0), fx=scale_up, fy=scale_up)
canvas_w = np.ceil(source.shape[0] / 512) * 512
canvas_h = np.ceil(source.shape[1] / 512) * 512
canvas_size = int(np.max([canvas_h, canvas_w]))
canvas = np.zeros([canvas_size, canvas_size, 3], dtype=np.uint8)
scale_factor = int(canvas_size / 512)
x_offset = int(np.floor((canvas_size - source.shape[1]) * 0.5))
y_offset = int(np.floor((canvas_size - source.shape[0]) * 0.5))
canvas[y_offset:(y_offset + source.shape[0]), x_offset:(x_offset + source.shape[1])] = source
input_source = cv2.resize(canvas, dsize=(512, 512))
再用模型处理512x512的图片
input_source = cv2.cvtColor(input_source, cv2.COLOR_BGR2RGB)
img = np.expand_dims((input_source / 255.0 * 2.0 - 1.0).astype(np.float32).transpose(2,0,1), axis=0)
global sess
if sess is None:
onnx_model = onnx.load_model(os.path.join(os.path.dirname(__file__), model))
sess = ort.InferenceSession(onnx_model.SerializeToString(), providers=['CUDAExecutionProvider'])
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
output = sess.run([output_name], {input_name : img})
res = output[0][0].transpose(1, 2, 0)
最后再放大图片到原来的尺寸,裁剪掉黑边
final_img = ((res.clip(-1, 1) * 0.5 + 0.5) * 255).astype(np.uint8)
final_img = cv2.resize(final_img, dsize=(0, 0), fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_CUBIC)
clip_img = final_img[y_offset:(y_offset + source.shape[0]), x_offset:(x_offset + source.shape[1])]
clip_img = cv2.cvtColor(clip_img, cv2.COLOR_RGB2BGR)
结合人脸识别
由于这个模型在人脸占比比较大的时候效果最好,所以能找到人脸区域,对人脸区域使用模型,那是效果最好的。于是我使用了YunNet做人脸识别,上面蓝框就是人脸识别的结果,然后将识别区域切割送给模型做动漫化即可
model = YuNet(modelPath=os.path.join(os.path.dirname(__file__), "./yunet.onnx"),
inputSize=[512, 512],
confThreshold=0.92,
nmsThreshold=0.3,
topK=5000,
backendId=cv.dnn.DNN_BACKEND_OPENCV,
targetId=cv.dnn.DNN_TARGET_CPU)
expand_size_percent = 0.4
def set_expand_size_percent(percent):
global expand_size_percent
expand_size_percent = percent
def detect_face_rect(orign_img):
global expand_size_percent
if orign_img is None:
return None, None
h = orign_img.shape[0]
scale = 512 / h
image = cv.resize(orign_img, dsize=(0, 0), fx = scale, fy = scale)
h, w, _ = image.shape
if w < 50:
return None, orign_img
# Inference
model.setInputSize([w, h])
results = model.infer(image)
if results is None or results.shape[0] == 0:
return None, None
bbox = results[0][0:4].astype(np.int32)
x_begin = int(1.0 / scale * bbox[0])
x_end = int(1.0 / scale * (bbox[0] + bbox[2]))
y_begin = int(1.0 / scale * bbox[1])
y_end = int(1.0 / scale * (bbox[1] + bbox[3]))
w = x_end - x_begin
w_expand = int(w * expand_size_percent)
x_begin = int(max(0, x_begin - w_expand * 0.5))
x_end = int(min(orign_img.shape[1], x_end + w_expand * 0.5))
h = y_end - y_begin
h_expand = int(h * expand_size_percent)
y_begin = int(max(0, y_begin - h_expand * 0.5))
y_end = int(min(orign_img.shape[0], y_end + h_expand * 0.5))
clip_face = orign_img[y_begin: y_end, x_begin: x_end]
return (x_begin, y_begin, x_end - x_begin, y_end - y_begin), clip_face
这里加入了expand_size_percent
参数,通过调整让框可以覆盖更大的范围