์ฝ˜ํ…์ธ ๋กœ ๊ฑด๋„ˆ๋›ฐ๊ธฐ

์ฐธ์กฐ ultralytics/nn/modules/conv.py

์ฐธ๊ณ 

์ด ํŒŒ์ผ์€ https://github.com/ultralytics/ ultralytics/blob/main/ ultralytics/nn/modules/conv .py์—์„œ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋ฌธ์ œ๋ฅผ ๋ฐœ๊ฒฌํ•˜๋ฉด ํ’€ ๋ฆฌํ€˜์ŠคํŠธ (๐Ÿ› ๏ธ) ๋ฅผ ํ†ตํ•ด ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜๋„๋ก ๋„์™€์ฃผ์„ธ์š”. ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค ๐Ÿ™!



ultralytics.nn.modules.conv.Conv

๋ฒ ์ด์Šค: Module

args(ch_in, ch_out, ์ปค๋„, ๋ณดํญ, ํŒจ๋”ฉ, ๊ทธ๋ฃน, ํ™•๋Œ€, ํ™œ์„ฑํ™”)๋ฅผ ์‚ฌ์šฉํ•œ ํ‘œ์ค€ ์ปจ๋ณผ๋ฃจ์…˜.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class Conv(nn.Module):
    """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        """Initialize Conv layer with given arguments including activation."""
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        """Apply convolution, batch normalization and activation to input tensor."""
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        """Perform transposed convolution of 2D data."""
        return self.act(self.conv(x))

__init__(c1, c2, k=1, s=1, p=None, g=1, d=1, act=True)

ํ™œ์„ฑํ™”๋ฅผ ํฌํ•จํ•œ ์ฃผ์–ด์ง„ ์ธ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ Conv ๋ ˆ์ด์–ด๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
    """Initialize Conv layer with given arguments including activation."""
    super().__init__()
    self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
    self.bn = nn.BatchNorm2d(c2)
    self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

forward(x)

์ž…๋ ฅ์— ์ปจ๋ณผ๋ฃจ์…˜, ์ผ๊ด„ ์ •๊ทœํ™” ๋ฐ ํ™œ์„ฑํ™”๋ฅผ ์ ์šฉ tensor.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """Apply convolution, batch normalization and activation to input tensor."""
    return self.act(self.bn(self.conv(x)))

forward_fuse(x)

2D ๋ฐ์ดํ„ฐ์˜ ์ „์น˜ ์ปจ๋ณผ๋ฃจ์…˜์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward_fuse(self, x):
    """Perform transposed convolution of 2D data."""
    return self.act(self.conv(x))



ultralytics.nn.modules.conv.Conv2

๋ฒ ์ด์Šค: Conv

Conv ์œตํ•ฉ์œผ๋กœ ๊ฐ„์†Œํ™”๋œ RepConv ๋ชจ๋“ˆ.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class Conv2(Conv):
    """Simplified RepConv module with Conv fusing."""

    def __init__(self, c1, c2, k=3, s=1, p=None, g=1, d=1, act=True):
        """Initialize Conv layer with given arguments including activation."""
        super().__init__(c1, c2, k, s, p, g=g, d=d, act=act)
        self.cv2 = nn.Conv2d(c1, c2, 1, s, autopad(1, p, d), groups=g, dilation=d, bias=False)  # add 1x1 conv

    def forward(self, x):
        """Apply convolution, batch normalization and activation to input tensor."""
        return self.act(self.bn(self.conv(x) + self.cv2(x)))

    def forward_fuse(self, x):
        """Apply fused convolution, batch normalization and activation to input tensor."""
        return self.act(self.bn(self.conv(x)))

    def fuse_convs(self):
        """Fuse parallel convolutions."""
        w = torch.zeros_like(self.conv.weight.data)
        i = [x // 2 for x in w.shape[2:]]
        w[:, :, i[0] : i[0] + 1, i[1] : i[1] + 1] = self.cv2.weight.data.clone()
        self.conv.weight.data += w
        self.__delattr__("cv2")
        self.forward = self.forward_fuse

__init__(c1, c2, k=3, s=1, p=None, g=1, d=1, act=True)

ํ™œ์„ฑํ™”๋ฅผ ํฌํ•จํ•œ ์ฃผ์–ด์ง„ ์ธ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ Conv ๋ ˆ์ด์–ด๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, c2, k=3, s=1, p=None, g=1, d=1, act=True):
    """Initialize Conv layer with given arguments including activation."""
    super().__init__(c1, c2, k, s, p, g=g, d=d, act=act)
    self.cv2 = nn.Conv2d(c1, c2, 1, s, autopad(1, p, d), groups=g, dilation=d, bias=False)  # add 1x1 conv

forward(x)

์ž…๋ ฅ์— ์ปจ๋ณผ๋ฃจ์…˜, ์ผ๊ด„ ์ •๊ทœํ™” ๋ฐ ํ™œ์„ฑํ™”๋ฅผ ์ ์šฉ tensor.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """Apply convolution, batch normalization and activation to input tensor."""
    return self.act(self.bn(self.conv(x) + self.cv2(x)))

forward_fuse(x)

์ž…๋ ฅ์— ์œตํ•ฉ ์ปจ๋ณผ๋ฃจ์…˜, ์ผ๊ด„ ์ •๊ทœํ™” ๋ฐ ํ™œ์„ฑํ™”๋ฅผ ์ ์šฉ tensor.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward_fuse(self, x):
    """Apply fused convolution, batch normalization and activation to input tensor."""
    return self.act(self.bn(self.conv(x)))

fuse_convs()

๋ณ‘๋ ฌ ์ปจ๋ณผ๋ฃจ์…˜์„ ์œตํ•ฉํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def fuse_convs(self):
    """Fuse parallel convolutions."""
    w = torch.zeros_like(self.conv.weight.data)
    i = [x // 2 for x in w.shape[2:]]
    w[:, :, i[0] : i[0] + 1, i[1] : i[1] + 1] = self.cv2.weight.data.clone()
    self.conv.weight.data += w
    self.__delattr__("cv2")
    self.forward = self.forward_fuse



ultralytics.nn.modules.conv.LightConv

๋ฒ ์ด์Šค: Module

args(ch_in, ch_out, ์ปค๋„)์„ ์‚ฌ์šฉํ•œ ๋ผ์ดํŠธ ์ปจ๋ณผ๋ฃจ์…˜.

https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class LightConv(nn.Module):
    """
    Light convolution with args(ch_in, ch_out, kernel).

    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
    """

    def __init__(self, c1, c2, k=1, act=nn.ReLU()):
        """Initialize Conv layer with given arguments including activation."""
        super().__init__()
        self.conv1 = Conv(c1, c2, 1, act=False)
        self.conv2 = DWConv(c2, c2, k, act=act)

    def forward(self, x):
        """Apply 2 convolutions to input tensor."""
        return self.conv2(self.conv1(x))

__init__(c1, c2, k=1, act=nn.ReLU())

ํ™œ์„ฑํ™”๋ฅผ ํฌํ•จํ•œ ์ฃผ์–ด์ง„ ์ธ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ Conv ๋ ˆ์ด์–ด๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, c2, k=1, act=nn.ReLU()):
    """Initialize Conv layer with given arguments including activation."""
    super().__init__()
    self.conv1 = Conv(c1, c2, 1, act=False)
    self.conv2 = DWConv(c2, c2, k, act=act)

forward(x)

์ž…๋ ฅ์— 2๊ฐœ์˜ ์ปจ๋ณผ๋ฃจ์…˜์„ ์ ์šฉ tensor.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """Apply 2 convolutions to input tensor."""
    return self.conv2(self.conv1(x))



ultralytics.nn.modules.conv.DWConv

๋ฒ ์ด์Šค: Conv

๊นŠ์ด๋ณ„ ์ปจ๋ณผ๋ฃจ์…˜.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class DWConv(Conv):
    """Depth-wise convolution."""

    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
        """Initialize Depth-wise convolution with given parameters."""
        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)

__init__(c1, c2, k=1, s=1, d=1, act=True)

์ฃผ์–ด์ง„ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ๊นŠ์ด๋ณ„ ์ปจ๋ณผ๋ฃจ์…˜์„ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
    """Initialize Depth-wise convolution with given parameters."""
    super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)



ultralytics.nn.modules.conv.DWConvTranspose2d

๋ฒ ์ด์Šค: ConvTranspose2d

๊นŠ์ด ๋ฐฉํ–ฅ ํŠธ๋žœ์ŠคํŽ˜์ด์Šค ์ปจ๋ณผ๋ฃจ์…˜.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class DWConvTranspose2d(nn.ConvTranspose2d):
    """Depth-wise transpose convolution."""

    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
        """Initialize DWConvTranspose2d class with given parameters."""
        super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))

__init__(c1, c2, k=1, s=1, p1=0, p2=0)

์ฃผ์–ด์ง„ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ DWConvTranspose2d ํด๋ž˜์Šค๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
    """Initialize DWConvTranspose2d class with given parameters."""
    super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))



ultralytics.nn.modules.conv.ConvTranspose

๋ฒ ์ด์Šค: Module

์ปจ๋ณผ๋ฃจ์…˜ ํŠธ๋žœ์Šคํฌ์ฆˆ 2D ๋ ˆ์ด์–ด.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class ConvTranspose(nn.Module):
    """Convolution transpose 2d layer."""

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
        """Initialize ConvTranspose2d layer with batch normalization and activation function."""
        super().__init__()
        self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
        self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        """Applies transposed convolutions, batch normalization and activation to input."""
        return self.act(self.bn(self.conv_transpose(x)))

    def forward_fuse(self, x):
        """Applies activation and convolution transpose operation to input."""
        return self.act(self.conv_transpose(x))

__init__(c1, c2, k=2, s=2, p=0, bn=True, act=True)

์ผ๊ด„ ์ •๊ทœํ™” ๋ฐ ํ™œ์„ฑํ™” ๊ธฐ๋Šฅ์œผ๋กœ ConvTranspose2d ๋ ˆ์ด์–ด๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
    """Initialize ConvTranspose2d layer with batch normalization and activation function."""
    super().__init__()
    self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
    self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
    self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

forward(x)

์ž…๋ ฅ์— ์ „์น˜ ์ปจ๋ณผ๋ฃจ์…˜, ์ผ๊ด„ ์ •๊ทœํ™” ๋ฐ ํ™œ์„ฑํ™”๋ฅผ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """Applies transposed convolutions, batch normalization and activation to input."""
    return self.act(self.bn(self.conv_transpose(x)))

forward_fuse(x)

์ž…๋ ฅ์— ํ™œ์„ฑํ™” ๋ฐ ์ปจ๋ณผ๋ฃจ์…˜ ์ „์น˜ ์—ฐ์‚ฐ์„ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward_fuse(self, x):
    """Applies activation and convolution transpose operation to input."""
    return self.act(self.conv_transpose(x))



ultralytics.nn.modules.conv.Focus

๋ฒ ์ด์Šค: Module

WH ์ •๋ณด๋ฅผ C-์ŠคํŽ˜์ด์Šค์— ์ง‘์ค‘ํ•˜์„ธ์š”.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class Focus(nn.Module):
    """Focus wh information into c-space."""

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
        """Initializes Focus object with user defined channel, convolution, padding, group and activation values."""
        super().__init__()
        self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
        # self.contract = Contract(gain=2)

    def forward(self, x):
        """
        Applies convolution to concatenated tensor and returns the output.

        Input shape is (b,c,w,h) and output shape is (b,4c,w/2,h/2).
        """
        return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))

__init__(c1, c2, k=1, s=1, p=None, g=1, act=True)

์‚ฌ์šฉ์ž ์ •์˜ ์ฑ„๋„, ์ปจ๋ณผ๋ฃจ์…˜, ํŒจ๋”ฉ, ๊ทธ๋ฃน ๋ฐ ํ™œ์„ฑํ™” ๊ฐ’์œผ๋กœ ํฌ์ปค์Šค ๊ฐœ์ฒด๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
    """Initializes Focus object with user defined channel, convolution, padding, group and activation values."""
    super().__init__()
    self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)

forward(x)

์—ฐ๊ฒฐ๋œ tensor ์— ์ปจ๋ณผ๋ฃจ์…˜์„ ์ ์šฉํ•˜๊ณ  ์ถœ๋ ฅ์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.

์ž…๋ ฅ ๋„ํ˜•์€ (b,c,w,h)์ด๊ณ  ์ถœ๋ ฅ ๋„ํ˜•์€ (b,4c,w/2,h/2)์ž…๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """
    Applies convolution to concatenated tensor and returns the output.

    Input shape is (b,c,w,h) and output shape is (b,4c,w/2,h/2).
    """
    return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))



ultralytics.nn.modules.conv.GhostConv

๋ฒ ์ด์Šค: Module

๊ณ ์ŠคํŠธ ์ปจ๋ณผ๋ฃจ์…˜ https://github.com/huawei-noah/ghostnet.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class GhostConv(nn.Module):
    """Ghost Convolution https://github.com/huawei-noah/ghostnet."""

    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
        """Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and
        activation.
        """
        super().__init__()
        c_ = c2 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)

    def forward(self, x):
        """Forward propagation through a Ghost Bottleneck layer with skip connection."""
        y = self.cv1(x)
        return torch.cat((y, self.cv2(y)), 1)

__init__(c1, c2, k=1, s=1, g=1, act=True)

์ž…๋ ฅ ์ฑ„๋„, ์ถœ๋ ฅ ์ฑ„๋„, ์ปค๋„ ํฌ๊ธฐ, ๋ณดํญ, ๊ทธ๋ฃน ๋ฐ ํ™œ์„ฑํ™”๋กœ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
    """Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and
    activation.
    """
    super().__init__()
    c_ = c2 // 2  # hidden channels
    self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
    self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)

forward(x)

๊ณ ์ŠคํŠธ ๋ณ‘๋ชฉ ๊ณ„์ธต์„ ํ†ตํ•œ ์ˆœ๋ฐฉํ–ฅ ์ „ํŒŒ(์—ฐ๊ฒฐ ๊ฑด๋„ˆ๋›ฐ๊ธฐ).

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """Forward propagation through a Ghost Bottleneck layer with skip connection."""
    y = self.cv1(x)
    return torch.cat((y, self.cv2(y)), 1)



ultralytics.nn.modules.conv.RepConv

๋ฒ ์ด์Šค: Module

RepConv๋Š” ๊ต์œก ๋ฐ ๋ฐฐํฌ ์ƒํƒœ๋ฅผ ํฌํ•จํ•œ ๊ธฐ๋ณธ ๋‹ด๋‹น์ž ์Šคํƒ€์ผ ๋ธ”๋ก์ž…๋‹ˆ๋‹ค.

์ด ๋ชจ๋“ˆ์€ RT-DETR ์—์„œ ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค. https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class RepConv(nn.Module):
    """
    RepConv is a basic rep-style block, including training and deploy status.

    This module is used in RT-DETR.
    Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
    """

    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
        """Initializes Light Convolution layer with inputs, outputs & optional activation function."""
        super().__init__()
        assert k == 3 and p == 1
        self.g = g
        self.c1 = c1
        self.c2 = c2
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

        self.bn = nn.BatchNorm2d(num_features=c1) if bn and c2 == c1 and s == 1 else None
        self.conv1 = Conv(c1, c2, k, s, p=p, g=g, act=False)
        self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)

    def forward_fuse(self, x):
        """Forward process."""
        return self.act(self.conv(x))

    def forward(self, x):
        """Forward process."""
        id_out = 0 if self.bn is None else self.bn(x)
        return self.act(self.conv1(x) + self.conv2(x) + id_out)

    def get_equivalent_kernel_bias(self):
        """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
        kernelid, biasid = self._fuse_bn_tensor(self.bn)
        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid

    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
        """Pads a 1x1 tensor to a 3x3 tensor."""
        if kernel1x1 is None:
            return 0
        else:
            return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])

    def _fuse_bn_tensor(self, branch):
        """Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
        if branch is None:
            return 0, 0
        if isinstance(branch, Conv):
            kernel = branch.conv.weight
            running_mean = branch.bn.running_mean
            running_var = branch.bn.running_var
            gamma = branch.bn.weight
            beta = branch.bn.bias
            eps = branch.bn.eps
        elif isinstance(branch, nn.BatchNorm2d):
            if not hasattr(self, "id_tensor"):
                input_dim = self.c1 // self.g
                kernel_value = np.zeros((self.c1, input_dim, 3, 3), dtype=np.float32)
                for i in range(self.c1):
                    kernel_value[i, i % input_dim, 1, 1] = 1
                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
            kernel = self.id_tensor
            running_mean = branch.running_mean
            running_var = branch.running_var
            gamma = branch.weight
            beta = branch.bias
            eps = branch.eps
        std = (running_var + eps).sqrt()
        t = (gamma / std).reshape(-1, 1, 1, 1)
        return kernel * t, beta - running_mean * gamma / std

    def fuse_convs(self):
        """Combines two convolution layers into a single layer and removes unused attributes from the class."""
        if hasattr(self, "conv"):
            return
        kernel, bias = self.get_equivalent_kernel_bias()
        self.conv = nn.Conv2d(
            in_channels=self.conv1.conv.in_channels,
            out_channels=self.conv1.conv.out_channels,
            kernel_size=self.conv1.conv.kernel_size,
            stride=self.conv1.conv.stride,
            padding=self.conv1.conv.padding,
            dilation=self.conv1.conv.dilation,
            groups=self.conv1.conv.groups,
            bias=True,
        ).requires_grad_(False)
        self.conv.weight.data = kernel
        self.conv.bias.data = bias
        for para in self.parameters():
            para.detach_()
        self.__delattr__("conv1")
        self.__delattr__("conv2")
        if hasattr(self, "nm"):
            self.__delattr__("nm")
        if hasattr(self, "bn"):
            self.__delattr__("bn")
        if hasattr(self, "id_tensor"):
            self.__delattr__("id_tensor")

__init__(c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False)

์ž…๋ ฅ, ์ถœ๋ ฅ ๋ฐ ์„ ํƒ์  ํ™œ์„ฑํ™” ๊ธฐ๋Šฅ์œผ๋กœ ๋ผ์ดํŠธ ์ปจ๋ณผ๋ฃจ์…˜ ๋ ˆ์ด์–ด๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
    """Initializes Light Convolution layer with inputs, outputs & optional activation function."""
    super().__init__()
    assert k == 3 and p == 1
    self.g = g
    self.c1 = c1
    self.c2 = c2
    self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    self.bn = nn.BatchNorm2d(num_features=c1) if bn and c2 == c1 and s == 1 else None
    self.conv1 = Conv(c1, c2, k, s, p=p, g=g, act=False)
    self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)

forward(x)

์ „๋‹ฌ ํ”„๋กœ์„ธ์Šค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """Forward process."""
    id_out = 0 if self.bn is None else self.bn(x)
    return self.act(self.conv1(x) + self.conv2(x) + id_out)

forward_fuse(x)

์ „๋‹ฌ ํ”„๋กœ์„ธ์Šค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward_fuse(self, x):
    """Forward process."""
    return self.act(self.conv(x))

fuse_convs()

๋‘ ๊ฐœ์˜ ์ปจ๋ณผ๋ฃจ์…˜ ๋ ˆ์ด์–ด๋ฅผ ๋‹จ์ผ ๋ ˆ์ด์–ด๋กœ ๊ฒฐํ•ฉํ•˜๊ณ  ํด๋ž˜์Šค์—์„œ ์‚ฌ์šฉํ•˜์ง€ ์•Š๋Š” ์†์„ฑ์„ ์ œ๊ฑฐํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def fuse_convs(self):
    """Combines two convolution layers into a single layer and removes unused attributes from the class."""
    if hasattr(self, "conv"):
        return
    kernel, bias = self.get_equivalent_kernel_bias()
    self.conv = nn.Conv2d(
        in_channels=self.conv1.conv.in_channels,
        out_channels=self.conv1.conv.out_channels,
        kernel_size=self.conv1.conv.kernel_size,
        stride=self.conv1.conv.stride,
        padding=self.conv1.conv.padding,
        dilation=self.conv1.conv.dilation,
        groups=self.conv1.conv.groups,
        bias=True,
    ).requires_grad_(False)
    self.conv.weight.data = kernel
    self.conv.bias.data = bias
    for para in self.parameters():
        para.detach_()
    self.__delattr__("conv1")
    self.__delattr__("conv2")
    if hasattr(self, "nm"):
        self.__delattr__("nm")
    if hasattr(self, "bn"):
        self.__delattr__("bn")
    if hasattr(self, "id_tensor"):
        self.__delattr__("id_tensor")

get_equivalent_kernel_bias()

3x3 ์ปค๋„, 1x1 ์ปค๋„, ์•„์ด๋ดํ‹ฐํ‹ฐ ์ปค๋„์— ํŽธํ–ฅ์„ ๋”ํ•˜์—ฌ ๋™๋“ฑํ•œ ์ปค๋„๊ณผ ํŽธํ–ฅ์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def get_equivalent_kernel_bias(self):
    """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
    kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
    kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
    kernelid, biasid = self._fuse_bn_tensor(self.bn)
    return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid



ultralytics.nn.modules.conv.ChannelAttention

๋ฒ ์ด์Šค: Module

์ฑ„๋„ ๊ด€์‹ฌ ๋ชจ๋“ˆ https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class ChannelAttention(nn.Module):
    """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""

    def __init__(self, channels: int) -> None:
        """Initializes the class and sets the basic configurations and instance variables required."""
        super().__init__()
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
        self.act = nn.Sigmoid()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
        return x * self.act(self.fc(self.pool(x)))

__init__(channels)

ํด๋ž˜์Šค๋ฅผ ์ดˆ๊ธฐํ™”ํ•˜๊ณ  ํ•„์š”ํ•œ ๊ธฐ๋ณธ ๊ตฌ์„ฑ๊ณผ ์ธ์Šคํ„ด์Šค ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, channels: int) -> None:
    """Initializes the class and sets the basic configurations and instance variables required."""
    super().__init__()
    self.pool = nn.AdaptiveAvgPool2d(1)
    self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
    self.act = nn.Sigmoid()

forward(x)

์ž…๋ ฅ์˜ ์ปจ๋ณผ๋ฃจ์…˜์— ํ™œ์„ฑํ™”๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํฌ์›Œ๋“œ ํŒจ์Šค๋ฅผ ์ ์šฉํ•˜๊ณ , ์„ ํƒ์ ์œผ๋กœ ์ผ๊ด„ ์ •๊ทœํ™”๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x: torch.Tensor) -> torch.Tensor:
    """Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
    return x * self.act(self.fc(self.pool(x)))



ultralytics.nn.modules.conv.SpatialAttention

๋ฒ ์ด์Šค: Module

๊ณต๊ฐ„ ์ฃผ์˜ ๋ชจ๋“ˆ.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class SpatialAttention(nn.Module):
    """Spatial-attention module."""

    def __init__(self, kernel_size=7):
        """Initialize Spatial-attention module with kernel size argument."""
        super().__init__()
        assert kernel_size in {3, 7}, "kernel size must be 3 or 7"
        padding = 3 if kernel_size == 7 else 1
        self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.act = nn.Sigmoid()

    def forward(self, x):
        """Apply channel and spatial attention on input for feature recalibration."""
        return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1)))

__init__(kernel_size=7)

์ปค๋„ ํฌ๊ธฐ ์ธ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๊ณต๊ฐ„ ์ฃผ์˜ ๋ชจ๋“ˆ์„ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, kernel_size=7):
    """Initialize Spatial-attention module with kernel size argument."""
    super().__init__()
    assert kernel_size in {3, 7}, "kernel size must be 3 or 7"
    padding = 3 if kernel_size == 7 else 1
    self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
    self.act = nn.Sigmoid()

forward(x)

๊ธฐ๋Šฅ ์žฌ๋ณด์ •์„ ์œ„ํ•ด ์ž…๋ ฅ์— ์ฑ„๋„ ๋ฐ ๊ณต๊ฐ„ ์ฃผ์˜๋ฅผ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """Apply channel and spatial attention on input for feature recalibration."""
    return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1)))



ultralytics.nn.modules.conv.CBAM

๋ฒ ์ด์Šค: Module

์ปจ๋ณผ๋ฃจ์…˜ ๋ธ”๋ก ์ฃผ์˜ ๋ชจ๋“ˆ.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class CBAM(nn.Module):
    """Convolutional Block Attention Module."""

    def __init__(self, c1, kernel_size=7):
        """Initialize CBAM with given input channel (c1) and kernel size."""
        super().__init__()
        self.channel_attention = ChannelAttention(c1)
        self.spatial_attention = SpatialAttention(kernel_size)

    def forward(self, x):
        """Applies the forward pass through C1 module."""
        return self.spatial_attention(self.channel_attention(x))

__init__(c1, kernel_size=7)

์ฃผ์–ด์ง„ ์ž…๋ ฅ ์ฑ„๋„(c1)๊ณผ ์ปค๋„ ํฌ๊ธฐ๋กœ CBAM์„ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, c1, kernel_size=7):
    """Initialize CBAM with given input channel (c1) and kernel size."""
    super().__init__()
    self.channel_attention = ChannelAttention(c1)
    self.spatial_attention = SpatialAttention(kernel_size)

forward(x)

C1 ๋ชจ๋“ˆ์„ ํ†ตํ•ด ํฌ์›Œ๋“œ ํŒจ์Šค๋ฅผ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """Applies the forward pass through C1 module."""
    return self.spatial_attention(self.channel_attention(x))



ultralytics.nn.modules.conv.Concat

๋ฒ ์ด์Šค: Module

์ฐจ์›์„ ๋”ฐ๋ผ ํ…์„œ ๋ชฉ๋ก์„ ์—ฐ๊ฒฐํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
class Concat(nn.Module):
    """Concatenate a list of tensors along dimension."""

    def __init__(self, dimension=1):
        """Concatenates a list of tensors along a specified dimension."""
        super().__init__()
        self.d = dimension

    def forward(self, x):
        """Forward pass for the YOLOv8 mask Proto module."""
        return torch.cat(x, self.d)

__init__(dimension=1)

์ง€์ •๋œ ์ฐจ์›์„ ๋”ฐ๋ผ ํ…์„œ ๋ชฉ๋ก์„ ์—ฐ๊ฒฐํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def __init__(self, dimension=1):
    """Concatenates a list of tensors along a specified dimension."""
    super().__init__()
    self.d = dimension

forward(x)

YOLOv8 ๋งˆ์Šคํฌ ํ”„๋กœํ†  ๋ชจ๋“ˆ์— ๋Œ€ํ•œ ์ „๋‹ฌ ํŒจ์Šค๋ฅผ ์ „๋‹ฌํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def forward(self, x):
    """Forward pass for the YOLOv8 mask Proto module."""
    return torch.cat(x, self.d)



ultralytics.nn.modules.conv.autopad(k, p=None, d=1)

ํŒจ๋“œ๋ฅผ '๋™์ผํ•œ' ๋ชจ์–‘ ์ถœ๋ ฅ์œผ๋กœ ์ „ํ™˜ํ•ฉ๋‹ˆ๋‹ค.

์˜ ์†Œ์Šค ์ฝ”๋“œ ultralytics/nn/modules/conv.py
def autopad(k, p=None, d=1):  # kernel, padding, dilation
    """Pad to 'same' shape outputs."""
    if d > 1:
        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p





์ƒ์„ฑ 2023-11-12, ์—…๋ฐ์ดํŠธ 2023-11-25
์ž‘์„ฑ์ž: glenn-jocher (3)