zhiqwang · zhiqwang · Nov 24, 2021 · Nov 24, 2021 · Nov 24, 2021 · Nov 24, 2021
diff --git a/notebooks/how-to-align-with-ultralytics-yolov5.ipynb b/notebooks/how-to-align-with-ultralytics-yolov5.ipynb
diff --git a/notebooks/inference-pytorch-export-libtorch.ipynb b/notebooks/inference-pytorch-export-libtorch.ipynb
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -53,6 +53,7 @@ def test_load_from_ultralytics(
     assert len(model_info["strides"]) == 4 if use_p6 else 3
 
 
+@pytest.mark.skip(reason="Due to #235")
 @pytest.mark.parametrize(
     "arch, version, upstream_version, hash_prefix",
     [("yolov5s-VOC", "r4.0", "v5.0", "23818cff")],

diff --git a/yolort/v5/helper.py b/yolort/v5/helper.py
@@ -4,8 +4,9 @@
 
 import torch
 
+from .models import AutoShape
 from .models.yolo import Model
-from .utils import attempt_download, set_logging
+from .utils import attempt_download, intersect_dicts, set_logging
 
 __all__ = ["add_yolov5_context", "load_yolov5_model", "get_yolov5_size"]
 
@@ -63,9 +64,11 @@ def load_yolov5_model(checkpoint_path: str, autoshape: bool = False, verbose: bo
         model_ckpt = ckpt["model"]  # load model
 
     model = Model(model_ckpt.yaml)  # create model
-    model.load_state_dict(model_ckpt.float().state_dict())  # load state_dict
+    ckpt_state_dict = model_ckpt.float().state_dict()  # checkpoint state_dict as FP32
+    ckpt_state_dict = intersect_dicts(ckpt_state_dict, model.state_dict(), exclude=["anchors"])
+    model.load_state_dict(ckpt_state_dict, strict=False)
 
     if autoshape:
-        model = model.autoshape()
+        model = AutoShape(model)
 
     return model
diff --git a/yolort/v5/models/common.py b/yolort/v5/models/common.py
@@ -1,4 +1,4 @@
-# YOLOv5 by Ultralytics, GPL-3.0 license
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
 Common modules
 """
@@ -16,19 +16,17 @@
 from PIL import Image
 from torch import nn, Tensor
 from torch.cuda import amp
-from yolort.v5.utils.datasets import exif_transpose, letterbox
 from yolort.v5.utils.general import (
     colorstr,
     increment_path,
     is_ascii,
     make_divisible,
     non_max_suppression,
-    save_one_box,
     scale_coords,
     xyxy2xywh,
 )
-from yolort.v5.utils.plots import Annotator, colors
-from yolort.v5.utils.torch_utils import time_sync
+from yolort.v5.utils.plots import Annotator, colors, save_one_box
+from yolort.v5.utils.torch_utils import copy_attr, time_sync
 
 LOGGER = logging.getLogger(__name__)
 
@@ -414,32 +412,52 @@ def forward(self, x):
 
 
 class AutoShape(nn.Module):
-    # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs.
-    # Includes preprocessing, inference and NMS
+    """
+    YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs.
+    Includes preprocessing, inference and NMS
+    """
+
     conf = 0.25  # NMS confidence threshold
     iou = 0.45  # NMS IoU threshold
-    classes = None  # (optional list) filter by class
+    # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
+    classes = None
     multi_label = False  # NMS multiple labels per box
     max_det = 1000  # maximum number of detections per image
 
     def __init__(self, model):
         super().__init__()
+        LOGGER.info("Adding AutoShape... ")
+        # copy attributes
+        copy_attr(self, model, include=("yaml", "nc", "hyp", "names", "stride", "abc"), exclude=())
         self.model = model.eval()
 
-    def autoshape(self):
-        LOGGER.info("AutoShape already enabled, skipping... ")  # model already converted to model.autoshape()
+    def _apply(self, fn):
+        """
+        Apply to(), cpu(), cuda(), half() to model tensors that
+        are not parameters or registered buffers
+        """
+        self = super()._apply(fn)
+        m = self.model.model[-1]  # Detect()
+        m.stride = fn(m.stride)
+        m.grid = list(map(fn, m.grid))
+        if isinstance(m.anchor_grid, list):
+            m.anchor_grid = list(map(fn, m.anchor_grid))
         return self
 
     @torch.no_grad()
     def forward(self, imgs, size=640, augment=False, profile=False):
-        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
-        #   file:       imgs = 'data/images/zidane.jpg'  # str or PosixPath
-        #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
-        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
-        #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
-        #   numpy:           = np.zeros((640,1280,3))  # HWC
-        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
-        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
+        """
+        Inference from various sources. For height=640, width=1280, RGB images example inputs are:
+            - file: imgs = 'data/images/zidane.jpg'  # str or PosixPath
+            - URI: = 'https://ultralytics.com/images/zidane.jpg'
+            - OpenCV: = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
+            - PIL: = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
+            - numpy: = np.zeros((640,1280,3))  # HWC
+            - torch: = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
+            - multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
+        """
+        from yolort.v5.utils.augmentations import letterbox
+        from yolort.v5.utils.datasets import exif_transpose
 
         t = [time_sync()]
         p = next(self.model.parameters())  # for device and type
@@ -448,10 +466,10 @@ def forward(self, imgs, size=640, augment=False, profile=False):
                 return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
 
         # Pre-process
-        n, imgs = (
-            (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])
-        )  # number of images, list of images
-        shape0, shape1, files = [], [], []  # image and inference shapes, filenames
+        # number of images, list of images
+        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])
+        # image and inference shapes, filenames
+        shape0, shape1, files = [], [], []
         for i, im in enumerate(imgs):
             f = f"image{i}"  # filename
             if isinstance(im, (str, Path)):  # filename or uri
@@ -476,7 +494,7 @@ def forward(self, imgs, size=640, augment=False, profile=False):
         x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
         x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
         x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
-        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.0  # uint8 to fp16/32
+        x = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32
         t.append(time_sync())
 
         with amp.autocast(enabled=p.device.type != "cpu"):
@@ -492,7 +510,7 @@ def forward(self, imgs, size=640, augment=False, profile=False):
                 classes=self.classes,
                 multi_label=self.multi_label,
                 max_det=self.max_det,
-            )  # NMS
+            )
             for i in range(n):
                 scale_coords(shape1, y[i][:, :4], shape0[i])
 

diff --git a/yolort/v5/models/experimental.py b/yolort/v5/models/experimental.py
@@ -1,4 +1,4 @@
-# YOLOv5 by Ultralytics, GPL-3.0 license
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 """
 Experimental modules
 """
@@ -7,7 +7,7 @@
 import torch
 from torch import nn
 
-from . import Conv
+from .common import Conv
 
 
 class CrossConv(nn.Module):