detectron2 模型部署大作战1

3,141 阅读4分钟

detectron2 pth to onnx

#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import argparse
import os
import onnx
import torch

from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import build_detection_test_loader, MetadataCatalog, DatasetCatalog
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
from detectron2.export import Caffe2Tracer, add_export_config
from detectron2.modeling import build_model
from detectron2.utils.logger import setup_logger

from detectron2.data.datasets import register_coco_instances
import random
from detectron2.utils.visualizer import Visualizer
from detectron2.engine import DefaultPredictor

def setup_cfg(args):
    cfg = get_cfg()
    # cuda context is initialized before creating dataloader, so we don't fork anymore
    cfg.DATALOADER.NUM_WORKERS = 0

    # 46837-20.5.13-v12.0-2-coco-clean.json
    register_coco_instances("xxx", {},
                            "../../pagexxxx.json",
                            "/xxximages")
    metadata = MetadataCatalog.get("xxx")
    print(metadata)

    cfg.merge_from_file(
        "../../configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    )
    # cfg.DATASETS.TRAIN = ("xxx",)
    cfg.DATASETS.TEST = ("xxx")  # no metrics implemented for this dataset
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.SOLVER.IMS_PER_BATCH = 1
    cfg.SOLVER.BASE_LR = 0.02
    cfg.SOLVER.MAX_ITER = (
        300
    )  # 300 iterations seems good enough, but you can certainly train longer
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
        128
    )  # faster, and good enough for this toy dataset
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 13


    cfg.OUTPUT_DIR = '../../page/output_13/'
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set the testing threshold for this model


    cfg = add_export_config(cfg)
    # cfg.merge_from_file(args.config_file)
    # cfg.merge_from_list(args.opts)
    cfg.DATASETS.TEST = ("page",)
    cfg.freeze()
    if cfg.MODEL.DEVICE != "cpu":
        TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
        assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!"
    return cfg


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert a model using caffe2 tracing.")
    parser.add_argument(
        "--format",
        choices=["caffe2", "onnx", "torchscript"],
        help="output format",
        default="caffe2",
    )
    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
    parser.add_argument("--run-eval", action="store_true")
    parser.add_argument("--output", help="output directory for the converted model")
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    logger = setup_logger()
    logger.info("Command line arguments: " + str(args))
    os.makedirs(args.output, exist_ok=True)

    cfg = setup_cfg(args)

    # create a torch model
    torch_model = build_model(cfg)
    DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)

    # get a sample data
    data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
    first_batch = next(iter(data_loader))

    # convert and save caffe2 model
    tracer = Caffe2Tracer(cfg, torch_model, first_batch)
    if args.format == "caffe2":
        caffe2_model = tracer.export_caffe2()
        caffe2_model.save_protobuf(args.output)
        # draw the caffe2 graph
        caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch)
    elif args.format == "onnx":
        onnx_model = tracer.export_onnx()
        onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
    elif args.format == "torchscript":
        script_model = tracer.export_torchscript()
        script_model.save(os.path.join(args.output, "model.ts"))

        # Recursively print IR of all modules
        with open(os.path.join(args.output, "model_ts_IR.txt"), "w") as f:
            try:
                f.write(script_model._actual_script_module._c.dump_to_str(True, False, False))
            except AttributeError:
                pass
        # Print IR of the entire graph (all submodules inlined)
        with open(os.path.join(args.output, "model_ts_IR_inlined.txt"), "w") as f:
            f.write(str(script_model.inlined_graph))
        # Print the model structure in pytorch style
        with open(os.path.join(args.output, "model.txt"), "w") as f:
            f.write(str(script_model))

    # run evaluation with the converted model
    if args.run_eval:
        assert args.format == "caffe2", "Python inference in other format is not yet supported."
        dataset = cfg.DATASETS.TEST[0]
        data_loader = build_detection_test_loader(cfg, dataset)
        # NOTE: hard-coded evaluator. change to the evaluator for your dataset
        evaluator = COCOEvaluator(dataset, cfg, True, args.output)
        metrics = inference_on_dataset(caffe2_model, data_loader, evaluator)
        print_csv_format(metrics)

在官方caffe2_converter.py做了一些改动,因为是自己的数据集,主要改动为:

def setup_cfg(args):
    cfg = get_cfg()
    # cuda context is initialized before creating dataloader, so we don't fork anymore
    cfg.DATALOADER.NUM_WORKERS = 0

    # 46837-20.5.13-v12.0-2-coco-clean.json
    register_coco_instances("page", {},
                            "../../page/xxxx-val.json",
                            "/mobileHDD/data/all_images/")
    metadata = MetadataCatalog.get("page")
    print(metadata)

    cfg.merge_from_file(
        "../../configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    )
    # cfg.DATASETS.TRAIN = ("page",)
    cfg.DATASETS.TEST = ("page")  # no metrics implemented for this dataset
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.SOLVER.IMS_PER_BATCH = 1
    cfg.SOLVER.BASE_LR = 0.02
    cfg.SOLVER.MAX_ITER = (
        300
    )  # 300 iterations seems good enough, but you can certainly train longer
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
        128
    )  # faster, and good enough for this toy dataset
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 13


    cfg.OUTPUT_DIR = '../../page/output_13/'
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set the testing threshold for this model


    cfg = add_export_config(cfg)
    # cfg.merge_from_file(args.config_file)
    # cfg.merge_from_list(args.opts)
    cfg.DATASETS.TEST = ("page",)
    cfg.freeze()
    if cfg.MODEL.DEVICE != "cpu":
        TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
        assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!"
    return cfg

运行参数:

python caffe2_converter.py --format onnx --output ./caffe2_model/  MODEL.DEVICE cpu

转换成功:

到这里都没什么坑.

to caffe2 pb

python caffe2_converter.py --format caffe2 --output ./caffe2_model/  MODEL.DEVICE cpu

onnx to tensorflow's pb

配置环境:

第一种:

pip install onnx
pip install onnx-tf

第二种:

# installation:
git clone https://github.com/onnx/onnx.git && cd onnx.
git submodule update --init --recursive.
pip install -e ..
# Install Tensorflow >= 2.0 and tensorflow-addons. (Note for Tensorflow 1.x please refer the tf-1.x branch)
git clone https://github.com/onnx/onnx-tensorflow.git && cd onnx-tensorflow.
pip install -e .

尝试转换:

代码:

import onnx
from onnx_tf.backend import prepare

# Load the ONNX file
model = onnx.load('./output_13/model.onnx')

# Import the ONNX model to Tensorflow
tf_rep = prepare(model)
# Input nodes to the model
print('inputs:', tf_rep.inputs)

# Output nodes from the model
print('outputs:', tf_rep.outputs)

# All nodes in the model
print('tensor_dict:')
print(tf_rep.tensor_dict)
tf_rep.export_graph("./output_13/model.pb")

问题1:

ModuleNotFoundError: No module named 'tensorflow_addons'

解决:

  1. pip install tensorflow-addons 找不到 该包

  2. pip install --upgrade pip

  3. 升级pip之后再pip install tensorflow-addons

问题2:

因为需要 tensorflow1.xx版本的模型,所以需要重新配置 onnx-tf版本

git clone https://github.com/onnx/onnx-tensorflow.git && cd onnx-tensorflow.
git checkout -b tf-1.x remotes/origin/tf-1.x
pip install -e .

conda uninstall tensorflow
conda install tensorflow=1.5.0

问题3:

配置好后,仍然是转换报错.

ValueError: '_wrapped_model.backbone.fpn_lateral2.bias' is not a valid node name

onnx-tf上查是否有该issue,果然有,还是open状态的:issues/609 好在还有个 pr,因为还没发版合并过去,只能自己根据这个 pr改 源码,重新 pip install -e .

搞定之后,尝试转换,坚持了一下,没报原来的错了,估计是解决了,但报新的错...

问题4:

Traceback (most recent call last):
  File "/root/dxq/detectron2/page/pth_to_pb.py", line 8, in <module>
    tf_rep = prepare(model)
  File "/mnt/new-1t-ssd/dxq/onnx-tensorflow/onnx_tf/backend.py", line 66, in prepare
    return cls.onnx_model_to_tensorflow_rep(model, strict)
  File "/mnt/new-1t-ssd/dxq/onnx-tensorflow/onnx_tf/backend.py", line 86, in onnx_model_to_tensorflow_rep
    return cls._onnx_graph_to_tensorflow_rep(model.graph, opset_import, strict)
  File "/mnt/new-1t-ssd/dxq/onnx-tensorflow/onnx_tf/backend.py", line 147, in _onnx_graph_to_tensorflow_rep
    strict=strict)
  File "/mnt/new-1t-ssd/dxq/onnx-tensorflow/onnx_tf/backend.py", line 249, in _onnx_node_to_tensorflow_op
    handler = handlers[node.domain].get(node.op_type, None)
KeyError: 'org.pytorch._caffe2'

这次官网也救不了我了. 只是看到有一个类似的issue : issues/639 看到有人吐槽 onnx-tf:

这也是我的心声...

停下来思考一下

怎么搞都不行,感觉困难重重.得停下来理一下思路,重新思考.

持续更新中...

没发现关于detectron2 模型转换,部署 特别好的文章,如果你有这方面模型部署上的经验,很希望有幸能与你交流请教.

不转成tensorflow 支持的model serving 的pb模型格式了

两个平台的东西,简单的模型可能还支持,要是涉及复杂一点的模型架构,肯定兼容问题,googlefacebook应该不会这么好心的,自家的护城河要好好保护好,既然入坑了,就让开发者尽量在坑里待着。 想通了这一点之后,就看看如何原生部署detectron2的模型了。

尝试1: pth --> pt

pytorchscript model模型格式转换:


import torch
from torch.autograd import Variable

image = Variable(torch.ones(3, 800, 800)).cuda()

inputs = {"image": image, "height": 800, "width": 800}
input_example = [inputs]
checkpoint = torch.load('./output_13/model_final.pth')
y=checkpoint(input_example)
traced_script_module = torch.jit.trace(checkpoint.model, input_example)
traced_script_module.save("./output_13/output.pt")

坑爹,还是报错:

y=checkpoint(input_example)
TypeError: 'dict' object is not callable

强扭的瓜不甜

既然detectron2已经有写好现成的模型导出格式了,直接转换,直接部署。 支持的格式:"caffe2", "onnx", "torchscript"

因为是官网支持的,定义好参数,直接输出即可,没有遇到问题:

python caffe2_converter.py --format torchscript --output ./caffe2_model/ts-gpu/  MODEL.DEVICE gpu

参考资料:

towardsdatascience.com/converting-…

stackoverflow.com/questions/5…

nbviewer.jupyter.org/github/onnx…

detectron2.readthedocs.io/tutorials/d…

pytorch.org/tutorials/a…

github.com/cinastanbea…

stackoverflow.com/questions/5…