detectron2 pth to onnx
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import argparse
import os
import onnx
import torch
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import build_detection_test_loader, MetadataCatalog, DatasetCatalog
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
from detectron2.export import Caffe2Tracer, add_export_config
from detectron2.modeling import build_model
from detectron2.utils.logger import setup_logger
from detectron2.data.datasets import register_coco_instances
import random
from detectron2.utils.visualizer import Visualizer
from detectron2.engine import DefaultPredictor
def setup_cfg(args):
cfg = get_cfg()
# cuda context is initialized before creating dataloader, so we don't fork anymore
cfg.DATALOADER.NUM_WORKERS = 0
# 46837-20.5.13-v12.0-2-coco-clean.json
register_coco_instances("xxx", {},
"../../pagexxxx.json",
"/xxximages")
metadata = MetadataCatalog.get("xxx")
print(metadata)
cfg.merge_from_file(
"../../configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
)
# cfg.DATASETS.TRAIN = ("xxx",)
cfg.DATASETS.TEST = ("xxx") # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 4
cfg.SOLVER.IMS_PER_BATCH = 1
cfg.SOLVER.BASE_LR = 0.02
cfg.SOLVER.MAX_ITER = (
300
) # 300 iterations seems good enough, but you can certainly train longer
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
128
) # faster, and good enough for this toy dataset
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 13
cfg.OUTPUT_DIR = '../../page/output_13/'
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set the testing threshold for this model
cfg = add_export_config(cfg)
# cfg.merge_from_file(args.config_file)
# cfg.merge_from_list(args.opts)
cfg.DATASETS.TEST = ("page",)
cfg.freeze()
if cfg.MODEL.DEVICE != "cpu":
TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!"
return cfg
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert a model using caffe2 tracing.")
parser.add_argument(
"--format",
choices=["caffe2", "onnx", "torchscript"],
help="output format",
default="caffe2",
)
parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
parser.add_argument("--run-eval", action="store_true")
parser.add_argument("--output", help="output directory for the converted model")
parser.add_argument(
"opts",
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
logger = setup_logger()
logger.info("Command line arguments: " + str(args))
os.makedirs(args.output, exist_ok=True)
cfg = setup_cfg(args)
# create a torch model
torch_model = build_model(cfg)
DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
# get a sample data
data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
first_batch = next(iter(data_loader))
# convert and save caffe2 model
tracer = Caffe2Tracer(cfg, torch_model, first_batch)
if args.format == "caffe2":
caffe2_model = tracer.export_caffe2()
caffe2_model.save_protobuf(args.output)
# draw the caffe2 graph
caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch)
elif args.format == "onnx":
onnx_model = tracer.export_onnx()
onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
elif args.format == "torchscript":
script_model = tracer.export_torchscript()
script_model.save(os.path.join(args.output, "model.ts"))
# Recursively print IR of all modules
with open(os.path.join(args.output, "model_ts_IR.txt"), "w") as f:
try:
f.write(script_model._actual_script_module._c.dump_to_str(True, False, False))
except AttributeError:
pass
# Print IR of the entire graph (all submodules inlined)
with open(os.path.join(args.output, "model_ts_IR_inlined.txt"), "w") as f:
f.write(str(script_model.inlined_graph))
# Print the model structure in pytorch style
with open(os.path.join(args.output, "model.txt"), "w") as f:
f.write(str(script_model))
# run evaluation with the converted model
if args.run_eval:
assert args.format == "caffe2", "Python inference in other format is not yet supported."
dataset = cfg.DATASETS.TEST[0]
data_loader = build_detection_test_loader(cfg, dataset)
# NOTE: hard-coded evaluator. change to the evaluator for your dataset
evaluator = COCOEvaluator(dataset, cfg, True, args.output)
metrics = inference_on_dataset(caffe2_model, data_loader, evaluator)
print_csv_format(metrics)
在官方caffe2_converter.py做了一些改动,因为是自己的数据集,主要改动为:
def setup_cfg(args):
cfg = get_cfg()
# cuda context is initialized before creating dataloader, so we don't fork anymore
cfg.DATALOADER.NUM_WORKERS = 0
# 46837-20.5.13-v12.0-2-coco-clean.json
register_coco_instances("page", {},
"../../page/xxxx-val.json",
"/mobileHDD/data/all_images/")
metadata = MetadataCatalog.get("page")
print(metadata)
cfg.merge_from_file(
"../../configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
)
# cfg.DATASETS.TRAIN = ("page",)
cfg.DATASETS.TEST = ("page") # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 4
cfg.SOLVER.IMS_PER_BATCH = 1
cfg.SOLVER.BASE_LR = 0.02
cfg.SOLVER.MAX_ITER = (
300
) # 300 iterations seems good enough, but you can certainly train longer
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
128
) # faster, and good enough for this toy dataset
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 13
cfg.OUTPUT_DIR = '../../page/output_13/'
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set the testing threshold for this model
cfg = add_export_config(cfg)
# cfg.merge_from_file(args.config_file)
# cfg.merge_from_list(args.opts)
cfg.DATASETS.TEST = ("page",)
cfg.freeze()
if cfg.MODEL.DEVICE != "cpu":
TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!"
return cfg
运行参数:
python caffe2_converter.py --format onnx --output ./caffe2_model/ MODEL.DEVICE cpu
转换成功:
到这里都没什么坑.
to caffe2 pb
python caffe2_converter.py --format caffe2 --output ./caffe2_model/ MODEL.DEVICE cpu
onnx to tensorflow's pb
配置环境:
第一种:
pip install onnx
pip install onnx-tf
第二种:
# installation:
git clone https://github.com/onnx/onnx.git && cd onnx.
git submodule update --init --recursive.
pip install -e ..
# Install Tensorflow >= 2.0 and tensorflow-addons. (Note for Tensorflow 1.x please refer the tf-1.x branch)
git clone https://github.com/onnx/onnx-tensorflow.git && cd onnx-tensorflow.
pip install -e .
尝试转换:
代码:
import onnx
from onnx_tf.backend import prepare
# Load the ONNX file
model = onnx.load('./output_13/model.onnx')
# Import the ONNX model to Tensorflow
tf_rep = prepare(model)
# Input nodes to the model
print('inputs:', tf_rep.inputs)
# Output nodes from the model
print('outputs:', tf_rep.outputs)
# All nodes in the model
print('tensor_dict:')
print(tf_rep.tensor_dict)
tf_rep.export_graph("./output_13/model.pb")
问题1:
ModuleNotFoundError: No module named 'tensorflow_addons'
解决:
-
pip install tensorflow-addons找不到 该包 -
pip install --upgrade pip -
升级
pip之后再pip install tensorflow-addons
问题2:
因为需要 tensorflow1.xx版本的模型,所以需要重新配置 onnx-tf版本
git clone https://github.com/onnx/onnx-tensorflow.git && cd onnx-tensorflow.
git checkout -b tf-1.x remotes/origin/tf-1.x
pip install -e .
conda uninstall tensorflow
conda install tensorflow=1.5.0
问题3:
配置好后,仍然是转换报错.
ValueError: '_wrapped_model.backbone.fpn_lateral2.bias' is not a valid node name
上 onnx-tf上查是否有该issue,果然有,还是open状态的:issues/609
好在还有个 pr,因为还没发版合并过去,只能自己根据这个 pr改 源码,重新 pip install -e .
搞定之后,尝试转换,坚持了一下,没报原来的错了,估计是解决了,但报新的错...
问题4:
Traceback (most recent call last):
File "/root/dxq/detectron2/page/pth_to_pb.py", line 8, in <module>
tf_rep = prepare(model)
File "/mnt/new-1t-ssd/dxq/onnx-tensorflow/onnx_tf/backend.py", line 66, in prepare
return cls.onnx_model_to_tensorflow_rep(model, strict)
File "/mnt/new-1t-ssd/dxq/onnx-tensorflow/onnx_tf/backend.py", line 86, in onnx_model_to_tensorflow_rep
return cls._onnx_graph_to_tensorflow_rep(model.graph, opset_import, strict)
File "/mnt/new-1t-ssd/dxq/onnx-tensorflow/onnx_tf/backend.py", line 147, in _onnx_graph_to_tensorflow_rep
strict=strict)
File "/mnt/new-1t-ssd/dxq/onnx-tensorflow/onnx_tf/backend.py", line 249, in _onnx_node_to_tensorflow_op
handler = handlers[node.domain].get(node.op_type, None)
KeyError: 'org.pytorch._caffe2'
这次官网也救不了我了.
只是看到有一个类似的issue : issues/639
看到有人吐槽 onnx-tf:
这也是我的心声...
停下来思考一下
怎么搞都不行,感觉困难重重.得停下来理一下思路,重新思考.
持续更新中...
没发现关于detectron2 模型转换,部署 特别好的文章,如果你有这方面模型部署上的经验,很希望有幸能与你交流请教.
不转成tensorflow 支持的model serving 的pb模型格式了
两个平台的东西,简单的模型可能还支持,要是涉及复杂一点的模型架构,肯定兼容问题,google和facebook应该不会这么好心的,自家的护城河要好好保护好,既然入坑了,就让开发者尽量在坑里待着。
想通了这一点之后,就看看如何原生部署detectron2的模型了。
尝试1: pth --> pt
pytorch的script model模型格式转换:
import torch
from torch.autograd import Variable
image = Variable(torch.ones(3, 800, 800)).cuda()
inputs = {"image": image, "height": 800, "width": 800}
input_example = [inputs]
checkpoint = torch.load('./output_13/model_final.pth')
y=checkpoint(input_example)
traced_script_module = torch.jit.trace(checkpoint.model, input_example)
traced_script_module.save("./output_13/output.pt")
坑爹,还是报错:
y=checkpoint(input_example)
TypeError: 'dict' object is not callable
强扭的瓜不甜
既然detectron2已经有写好现成的模型导出格式了,直接转换,直接部署。
支持的格式:"caffe2", "onnx", "torchscript"
因为是官网支持的,定义好参数,直接输出即可,没有遇到问题:
python caffe2_converter.py --format torchscript --output ./caffe2_model/ts-gpu/ MODEL.DEVICE gpu