Yolov5s/ai_training/mmdetection/tests/test_onnx/test_head.py

# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from functools import partial

import mmcv
import numpy as np
import pytest
import torch
from mmcv.cnn import Scale

from mmdet import digit_version
from mmdet.models import build_detector
from mmdet.models.dense_heads import (FCOSHead, FSAFHead, RetinaHead, SSDHead,
                                      YOLOV3Head)
from .utils import ort_validate

data_path = osp.join(osp.dirname(__file__), 'data')

if digit_version(torch.__version__) <= digit_version('1.5.0'):
    pytest.skip(
        'ort backend does not support version below 1.5.0',
        allow_module_level=True)


def test_cascade_onnx_export():

    config_path = './configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py'
    cfg = mmcv.Config.fromfile(config_path)
    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
    with torch.no_grad():
        model.forward = partial(model.forward, img_metas=[[dict()]])

        dynamic_axes = {
            'input_img': {
                0: 'batch',
                2: 'width',
                3: 'height'
            },
            'dets': {
                0: 'batch',
                1: 'num_dets',
            },
            'labels': {
                0: 'batch',
                1: 'num_dets',
            },
        }
        torch.onnx.export(
            model, [torch.rand(1, 3, 400, 500)],
            'tmp.onnx',
            output_names=['dets', 'labels'],
            input_names=['input_img'],
            keep_initializers_as_inputs=True,
            do_constant_folding=True,
            verbose=False,
            opset_version=11,
            dynamic_axes=dynamic_axes)


def test_faster_onnx_export():

    config_path = './configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
    cfg = mmcv.Config.fromfile(config_path)
    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
    with torch.no_grad():
        model.forward = partial(model.forward, img_metas=[[dict()]])

        dynamic_axes = {
            'input_img': {
                0: 'batch',
                2: 'width',
                3: 'height'
            },
            'dets': {
                0: 'batch',
                1: 'num_dets',
            },
            'labels': {
                0: 'batch',
                1: 'num_dets',
            },
        }
        torch.onnx.export(
            model, [torch.rand(1, 3, 400, 500)],
            'tmp.onnx',
            output_names=['dets', 'labels'],
            input_names=['input_img'],
            keep_initializers_as_inputs=True,
            do_constant_folding=True,
            verbose=False,
            opset_version=11,
            dynamic_axes=dynamic_axes)


def retinanet_config():
    """RetinanNet Head Config."""
    head_cfg = dict(
        stacked_convs=6,
        feat_channels=2,
        anchor_generator=dict(
            type='AnchorGenerator',
            octave_base_scale=4,
            scales_per_octave=3,
            ratios=[0.5, 1.0, 2.0],
            strides=[8, 16, 32, 64, 128]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[1.0, 1.0, 1.0, 1.0]))

    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100))

    model = RetinaHead(
        num_classes=4, in_channels=1, test_cfg=test_cfg, **head_cfg)
    model.requires_grad_(False)

    return model


def test_retina_head_forward_single():
    """Test RetinaNet Head single forward in torch and onnxruntime env."""
    retina_model = retinanet_config()

    feat = torch.rand(1, retina_model.in_channels, 32, 32)
    # validate the result between the torch and ort
    ort_validate(retina_model.forward_single, feat)


def test_retina_head_forward():
    """Test RetinaNet Head forward in torch and onnxruntime env."""
    retina_model = retinanet_config()
    s = 128
    # RetinaNet head expects a multiple levels of features per image
    feats = [
        torch.rand(1, retina_model.in_channels, s // (2**(i + 2)),
                   s // (2**(i + 2)))  # [32, 16, 8, 4, 2]
        for i in range(len(retina_model.prior_generator.strides))
    ]
    ort_validate(retina_model.forward, feats)


def test_retinanet_head_onnx_export():
    """Test RetinaNet Head _get_bboxes() in torch and onnxruntime env."""
    retina_model = retinanet_config()
    s = 128
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'scale_factor': np.ones(4),
        'pad_shape': (s, s, 3),
        'img_shape': (s, s, 2)
    }]

    # The data of retina_head_get_bboxes.pkl contains two parts:
    # cls_score(list(Tensor)) and bboxes(list(Tensor)),
    # where each torch.Tensor is generated by torch.rand().
    # the cls_score's size: (1, 36, 32, 32), (1, 36, 16, 16),
    # (1, 36, 8, 8), (1, 36, 4, 4), (1, 36, 2, 2).
    # the bboxes's size: (1, 36, 32, 32), (1, 36, 16, 16),
    # (1, 36, 8, 8), (1, 36, 4, 4), (1, 36, 2, 2)
    retina_head_data = 'retina_head_get_bboxes.pkl'
    feats = mmcv.load(osp.join(data_path, retina_head_data))
    cls_score = feats[:5]
    bboxes = feats[5:]

    retina_model.onnx_export = partial(
        retina_model.onnx_export, img_metas=img_metas, with_nms=False)
    ort_validate(retina_model.onnx_export, (cls_score, bboxes))


def yolo_config():
    """YoloV3 Head Config."""
    head_cfg = dict(
        anchor_generator=dict(
            type='YOLOAnchorGenerator',
            base_sizes=[[(116, 90), (156, 198), (373, 326)],
                        [(30, 61), (62, 45), (59, 119)],
                        [(10, 13), (16, 30), (33, 23)]],
            strides=[32, 16, 8]),
        bbox_coder=dict(type='YOLOBBoxCoder'))

    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            min_bbox_size=0,
            score_thr=0.05,
            conf_thr=0.005,
            nms=dict(type='nms', iou_threshold=0.45),
            max_per_img=100))

    model = YOLOV3Head(
        num_classes=4,
        in_channels=[1, 1, 1],
        out_channels=[16, 8, 4],
        test_cfg=test_cfg,
        **head_cfg)
    model.requires_grad_(False)
    # yolov3 need eval()
    model.cpu().eval()
    return model


def test_yolov3_head_forward():
    """Test Yolov3 head forward() in torch and ort env."""
    yolo_model = yolo_config()

    # Yolov3 head expects a multiple levels of features per image
    feats = [
        torch.rand(1, 1, 64 // (2**(i + 2)), 64 // (2**(i + 2)))
        for i in range(len(yolo_model.in_channels))
    ]
    ort_validate(yolo_model.forward, feats)


def test_yolov3_head_onnx_export():
    """Test yolov3 head get_bboxes() in torch and ort env."""
    yolo_model = yolo_config()
    s = 128
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'img_shape': (s, s, 3),
        'scale_factor': np.ones(4),
        'pad_shape': (s, s, 3)
    }]

    # The data of yolov3_head_get_bboxes.pkl contains
    # a list of torch.Tensor, where each torch.Tensor
    # is generated by torch.rand and each tensor size is:
    # (1, 27, 32, 32), (1, 27, 16, 16), (1, 27, 8, 8).
    yolo_head_data = 'yolov3_head_get_bboxes.pkl'
    pred_maps = mmcv.load(osp.join(data_path, yolo_head_data))

    yolo_model.onnx_export = partial(
        yolo_model.onnx_export, img_metas=img_metas, with_nms=False)
    ort_validate(yolo_model.onnx_export, pred_maps)


def fcos_config():
    """FCOS Head Config."""
    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100))

    model = FCOSHead(num_classes=4, in_channels=1, test_cfg=test_cfg)

    model.requires_grad_(False)
    return model


def test_fcos_head_forward_single():
    """Test fcos forward single in torch and ort env."""
    fcos_model = fcos_config()

    feat = torch.rand(1, fcos_model.in_channels, 32, 32)
    fcos_model.forward_single = partial(
        fcos_model.forward_single,
        scale=Scale(1.0).requires_grad_(False),
        stride=(4, ))
    ort_validate(fcos_model.forward_single, feat)


def test_fcos_head_forward():
    """Test fcos forward in mutil-level feature map."""
    fcos_model = fcos_config()
    s = 128
    feats = [
        torch.rand(1, 1, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]
    ort_validate(fcos_model.forward, feats)


def test_fcos_head_onnx_export():
    """Test fcos head get_bboxes() in ort."""
    fcos_model = fcos_config()
    s = 128
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'img_shape': (s, s, 3),
        'scale_factor': np.ones(4),
        'pad_shape': (s, s, 3)
    }]

    cls_scores = [
        torch.rand(1, fcos_model.num_classes, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]
    bboxes = [
        torch.rand(1, 4, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]
    centerness = [
        torch.rand(1, 1, s // feat_size, s // feat_size)
        for feat_size in [4, 8, 16, 32, 64]
    ]

    fcos_model.onnx_export = partial(
        fcos_model.onnx_export, img_metas=img_metas, with_nms=False)
    ort_validate(fcos_model.onnx_export, (cls_scores, bboxes, centerness))


def fsaf_config():
    """FSAF Head Config."""
    cfg = dict(
        anchor_generator=dict(
            type='AnchorGenerator',
            octave_base_scale=1,
            scales_per_octave=1,
            ratios=[1.0],
            strides=[8, 16, 32, 64, 128]))

    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            min_bbox_size=0,
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100))

    model = FSAFHead(num_classes=4, in_channels=1, test_cfg=test_cfg, **cfg)
    model.requires_grad_(False)
    return model


def test_fsaf_head_forward_single():
    """Test RetinaNet Head forward_single() in torch and onnxruntime env."""
    fsaf_model = fsaf_config()

    feat = torch.rand(1, fsaf_model.in_channels, 32, 32)
    ort_validate(fsaf_model.forward_single, feat)


def test_fsaf_head_forward():
    """Test RetinaNet Head forward in torch and onnxruntime env."""
    fsaf_model = fsaf_config()
    s = 128
    feats = [
        torch.rand(1, fsaf_model.in_channels, s // (2**(i + 2)),
                   s // (2**(i + 2)))
        for i in range(len(fsaf_model.anchor_generator.strides))
    ]
    ort_validate(fsaf_model.forward, feats)


def test_fsaf_head_onnx_export():
    """Test RetinaNet Head get_bboxes in torch and onnxruntime env."""
    fsaf_model = fsaf_config()
    s = 256
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'scale_factor': np.ones(4),
        'pad_shape': (s, s, 3),
        'img_shape': (s, s, 2)
    }]

    # The data of fsaf_head_get_bboxes.pkl contains two parts:
    # cls_score(list(Tensor)) and bboxes(list(Tensor)),
    # where each torch.Tensor is generated by torch.rand().
    # the cls_score's size: (1, 4, 64, 64), (1, 4, 32, 32),
    # (1, 4, 16, 16), (1, 4, 8, 8), (1, 4, 4, 4).
    # the bboxes's size: (1, 4, 64, 64), (1, 4, 32, 32),
    # (1, 4, 16, 16), (1, 4, 8, 8), (1, 4, 4, 4).
    fsaf_head_data = 'fsaf_head_get_bboxes.pkl'
    feats = mmcv.load(osp.join(data_path, fsaf_head_data))
    cls_score = feats[:5]
    bboxes = feats[5:]

    fsaf_model.onnx_export = partial(
        fsaf_model.onnx_export, img_metas=img_metas, with_nms=False)
    ort_validate(fsaf_model.onnx_export, (cls_score, bboxes))


def ssd_config():
    """SSD Head Config."""
    cfg = dict(
        anchor_generator=dict(
            type='SSDAnchorGenerator',
            scale_major=False,
            input_size=300,
            basesize_ratio_range=(0.15, 0.9),
            strides=[8, 16, 32, 64, 100, 300],
            ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[0.1, 0.1, 0.2, 0.2]))

    test_cfg = mmcv.Config(
        dict(
            deploy_nms_pre=0,
            nms=dict(type='nms', iou_threshold=0.45),
            min_bbox_size=0,
            score_thr=0.02,
            max_per_img=200))

    model = SSDHead(
        num_classes=4,
        in_channels=(4, 8, 4, 2, 2, 2),
        test_cfg=test_cfg,
        **cfg)

    model.requires_grad_(False)
    return model


def test_ssd_head_forward():
    """Test SSD Head forward in torch and onnxruntime env."""
    ssd_model = ssd_config()

    featmap_size = [38, 19, 10, 6, 5, 3, 1]

    feats = [
        torch.rand(1, ssd_model.in_channels[i], featmap_size[i],
                   featmap_size[i]) for i in range(len(ssd_model.in_channels))
    ]
    ort_validate(ssd_model.forward, feats)


def test_ssd_head_onnx_export():
    """Test SSD Head get_bboxes in torch and onnxruntime env."""
    ssd_model = ssd_config()
    s = 300
    img_metas = [{
        'img_shape_for_onnx': torch.Tensor([s, s]),
        'scale_factor': np.ones(4),
        'pad_shape': (s, s, 3),
        'img_shape': (s, s, 2)
    }]

    # The data of ssd_head_get_bboxes.pkl contains two parts:
    # cls_score(list(Tensor)) and bboxes(list(Tensor)),
    # where each torch.Tensor is generated by torch.rand().
    # the cls_score's size: (1, 20, 38, 38), (1, 30, 19, 19),
    # (1, 30, 10, 10), (1, 30, 5, 5), (1, 20, 3, 3), (1, 20, 1, 1).
    # the bboxes's size: (1, 16, 38, 38), (1, 24, 19, 19),
    # (1, 24, 10, 10), (1, 24, 5, 5), (1, 16, 3, 3), (1, 16, 1, 1).
    ssd_head_data = 'ssd_head_get_bboxes.pkl'
    feats = mmcv.load(osp.join(data_path, ssd_head_data))
    cls_score = feats[:6]
    bboxes = feats[6:]

    ssd_model.onnx_export = partial(
        ssd_model.onnx_export, img_metas=img_metas, with_nms=False)
    ort_validate(ssd_model.onnx_export, (cls_score, bboxes))