Skip to content

Reference for ultralytics/nn/modules/block.py

Note

This file is available at https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/block.py. If you spot a problem please help fix it by contributing a Pull Request 🛠️. Thank you 🙏!



ultralytics.nn.modules.block.DFL

Bases: Module

Integral module of Distribution Focal Loss (DFL).

Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391

Source code in ultralytics/nn/modules/block.py
class DFL(nn.Module):
    """
    Integral module of Distribution Focal Loss (DFL).

    Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
    """

    def __init__(self, c1=16):
        """Initialize a convolutional layer with a given number of input channels."""
        super().__init__()
        self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
        x = torch.arange(c1, dtype=torch.float)
        self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
        self.c1 = c1

    def forward(self, x):
        """Applies a transformer layer on input tensor 'x' and returns a tensor."""
        b, _, a = x.shape  # batch, channels, anchors
        return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)

__init__(c1=16)

Initialize a convolutional layer with a given number of input channels.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1=16):
    """Initialize a convolutional layer with a given number of input channels."""
    super().__init__()
    self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
    x = torch.arange(c1, dtype=torch.float)
    self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
    self.c1 = c1

forward(x)

Applies a transformer layer on input tensor 'x' and returns a tensor.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Applies a transformer layer on input tensor 'x' and returns a tensor."""
    b, _, a = x.shape  # batch, channels, anchors
    return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)



ultralytics.nn.modules.block.Proto

Bases: Module

YOLOv8 mask Proto module for segmentation models.

Source code in ultralytics/nn/modules/block.py
class Proto(nn.Module):
    """YOLOv8 mask Proto module for segmentation models."""

    def __init__(self, c1, c_=256, c2=32):
        """
        Initializes the YOLOv8 mask Proto module with specified number of protos and masks.

        Input arguments are ch_in, number of protos, number of masks.
        """
        super().__init__()
        self.cv1 = Conv(c1, c_, k=3)
        self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True)  # nn.Upsample(scale_factor=2, mode='nearest')
        self.cv2 = Conv(c_, c_, k=3)
        self.cv3 = Conv(c_, c2)

    def forward(self, x):
        """Performs a forward pass through layers using an upsampled input image."""
        return self.cv3(self.cv2(self.upsample(self.cv1(x))))

__init__(c1, c_=256, c2=32)

Initializes the YOLOv8 mask Proto module with specified number of protos and masks.

Input arguments are ch_in, number of protos, number of masks.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c_=256, c2=32):
    """
    Initializes the YOLOv8 mask Proto module with specified number of protos and masks.

    Input arguments are ch_in, number of protos, number of masks.
    """
    super().__init__()
    self.cv1 = Conv(c1, c_, k=3)
    self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True)  # nn.Upsample(scale_factor=2, mode='nearest')
    self.cv2 = Conv(c_, c_, k=3)
    self.cv3 = Conv(c_, c2)

forward(x)

Performs a forward pass through layers using an upsampled input image.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Performs a forward pass through layers using an upsampled input image."""
    return self.cv3(self.cv2(self.upsample(self.cv1(x))))



ultralytics.nn.modules.block.HGStem

Bases: Module

StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.

https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py

Source code in ultralytics/nn/modules/block.py
class HGStem(nn.Module):
    """
    StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.

    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
    """

    def __init__(self, c1, cm, c2):
        """Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
        super().__init__()
        self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
        self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
        self.stem2b = Conv(cm // 2, cm, 2, 1, 0, act=nn.ReLU())
        self.stem3 = Conv(cm * 2, cm, 3, 2, act=nn.ReLU())
        self.stem4 = Conv(cm, c2, 1, 1, act=nn.ReLU())
        self.pool = nn.MaxPool2d(kernel_size=2, stride=1, padding=0, ceil_mode=True)

    def forward(self, x):
        """Forward pass of a PPHGNetV2 backbone layer."""
        x = self.stem1(x)
        x = F.pad(x, [0, 1, 0, 1])
        x2 = self.stem2a(x)
        x2 = F.pad(x2, [0, 1, 0, 1])
        x2 = self.stem2b(x2)
        x1 = self.pool(x)
        x = torch.cat([x1, x2], dim=1)
        x = self.stem3(x)
        x = self.stem4(x)
        return x

__init__(c1, cm, c2)

Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, cm, c2):
    """Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
    super().__init__()
    self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
    self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
    self.stem2b = Conv(cm // 2, cm, 2, 1, 0, act=nn.ReLU())
    self.stem3 = Conv(cm * 2, cm, 3, 2, act=nn.ReLU())
    self.stem4 = Conv(cm, c2, 1, 1, act=nn.ReLU())
    self.pool = nn.MaxPool2d(kernel_size=2, stride=1, padding=0, ceil_mode=True)

forward(x)

Forward pass of a PPHGNetV2 backbone layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass of a PPHGNetV2 backbone layer."""
    x = self.stem1(x)
    x = F.pad(x, [0, 1, 0, 1])
    x2 = self.stem2a(x)
    x2 = F.pad(x2, [0, 1, 0, 1])
    x2 = self.stem2b(x2)
    x1 = self.pool(x)
    x = torch.cat([x1, x2], dim=1)
    x = self.stem3(x)
    x = self.stem4(x)
    return x



ultralytics.nn.modules.block.HGBlock

Bases: Module

HG_Block of PPHGNetV2 with 2 convolutions and LightConv.

https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py

Source code in ultralytics/nn/modules/block.py
class HGBlock(nn.Module):
    """
    HG_Block of PPHGNetV2 with 2 convolutions and LightConv.

    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
    """

    def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
        """Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
        super().__init__()
        block = LightConv if lightconv else Conv
        self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
        self.sc = Conv(c1 + n * cm, c2 // 2, 1, 1, act=act)  # squeeze conv
        self.ec = Conv(c2 // 2, c2, 1, 1, act=act)  # excitation conv
        self.add = shortcut and c1 == c2

    def forward(self, x):
        """Forward pass of a PPHGNetV2 backbone layer."""
        y = [x]
        y.extend(m(y[-1]) for m in self.m)
        y = self.ec(self.sc(torch.cat(y, 1)))
        return y + x if self.add else y

__init__(c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU())

Initializes a CSP Bottleneck with 1 convolution using specified input and output channels.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
    """Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
    super().__init__()
    block = LightConv if lightconv else Conv
    self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
    self.sc = Conv(c1 + n * cm, c2 // 2, 1, 1, act=act)  # squeeze conv
    self.ec = Conv(c2 // 2, c2, 1, 1, act=act)  # excitation conv
    self.add = shortcut and c1 == c2

forward(x)

Forward pass of a PPHGNetV2 backbone layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass of a PPHGNetV2 backbone layer."""
    y = [x]
    y.extend(m(y[-1]) for m in self.m)
    y = self.ec(self.sc(torch.cat(y, 1)))
    return y + x if self.add else y



ultralytics.nn.modules.block.SPP

Bases: Module

Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729.

Source code in ultralytics/nn/modules/block.py
class SPP(nn.Module):
    """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""

    def __init__(self, c1, c2, k=(5, 9, 13)):
        """Initialize the SPP layer with input/output channels and pooling kernel sizes."""
        super().__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])

    def forward(self, x):
        """Forward pass of the SPP layer, performing spatial pyramid pooling."""
        x = self.cv1(x)
        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))

__init__(c1, c2, k=(5, 9, 13))

Initialize the SPP layer with input/output channels and pooling kernel sizes.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, k=(5, 9, 13)):
    """Initialize the SPP layer with input/output channels and pooling kernel sizes."""
    super().__init__()
    c_ = c1 // 2  # hidden channels
    self.cv1 = Conv(c1, c_, 1, 1)
    self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
    self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])

forward(x)

Forward pass of the SPP layer, performing spatial pyramid pooling.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass of the SPP layer, performing spatial pyramid pooling."""
    x = self.cv1(x)
    return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))



ultralytics.nn.modules.block.SPPF

Bases: Module

Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher.

Source code in ultralytics/nn/modules/block.py
class SPPF(nn.Module):
    """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""

    def __init__(self, c1, c2, k=5):
        """
        Initializes the SPPF layer with given input/output channels and kernel size.

        This module is equivalent to SPP(k=(5, 9, 13)).
        """
        super().__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * 4, c2, 1, 1)
        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)

    def forward(self, x):
        """Forward pass through Ghost Convolution block."""
        y = [self.cv1(x)]
        y.extend(self.m(y[-1]) for _ in range(3))
        return self.cv2(torch.cat(y, 1))

__init__(c1, c2, k=5)

Initializes the SPPF layer with given input/output channels and kernel size.

This module is equivalent to SPP(k=(5, 9, 13)).

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, k=5):
    """
    Initializes the SPPF layer with given input/output channels and kernel size.

    This module is equivalent to SPP(k=(5, 9, 13)).
    """
    super().__init__()
    c_ = c1 // 2  # hidden channels
    self.cv1 = Conv(c1, c_, 1, 1)
    self.cv2 = Conv(c_ * 4, c2, 1, 1)
    self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)

forward(x)

Forward pass through Ghost Convolution block.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through Ghost Convolution block."""
    y = [self.cv1(x)]
    y.extend(self.m(y[-1]) for _ in range(3))
    return self.cv2(torch.cat(y, 1))



ultralytics.nn.modules.block.C1

Bases: Module

CSP Bottleneck with 1 convolution.

Source code in ultralytics/nn/modules/block.py
class C1(nn.Module):
    """CSP Bottleneck with 1 convolution."""

    def __init__(self, c1, c2, n=1):
        """Initializes the CSP Bottleneck with configurations for 1 convolution with arguments ch_in, ch_out, number."""
        super().__init__()
        self.cv1 = Conv(c1, c2, 1, 1)
        self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))

    def forward(self, x):
        """Applies cross-convolutions to input in the C3 module."""
        y = self.cv1(x)
        return self.m(y) + y

__init__(c1, c2, n=1)

Initializes the CSP Bottleneck with configurations for 1 convolution with arguments ch_in, ch_out, number.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1):
    """Initializes the CSP Bottleneck with configurations for 1 convolution with arguments ch_in, ch_out, number."""
    super().__init__()
    self.cv1 = Conv(c1, c2, 1, 1)
    self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))

forward(x)

Applies cross-convolutions to input in the C3 module.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Applies cross-convolutions to input in the C3 module."""
    y = self.cv1(x)
    return self.m(y) + y



ultralytics.nn.modules.block.C2

Bases: Module

CSP Bottleneck with 2 convolutions.

Source code in ultralytics/nn/modules/block.py
class C2(nn.Module):
    """CSP Bottleneck with 2 convolutions."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        """Initializes the CSP Bottleneck with 2 convolutions module with arguments ch_in, ch_out, number, shortcut,
        groups, expansion.
        """
        super().__init__()
        self.c = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
        self.cv2 = Conv(2 * self.c, c2, 1)  # optional act=FReLU(c2)
        # self.attention = ChannelAttention(2 * self.c)  # or SpatialAttention()
        self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))

    def forward(self, x):
        """Forward pass through the CSP bottleneck with 2 convolutions."""
        a, b = self.cv1(x).chunk(2, 1)
        return self.cv2(torch.cat((self.m(a), b), 1))

__init__(c1, c2, n=1, shortcut=True, g=1, e=0.5)

Initializes the CSP Bottleneck with 2 convolutions module with arguments ch_in, ch_out, number, shortcut, groups, expansion.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
    """Initializes the CSP Bottleneck with 2 convolutions module with arguments ch_in, ch_out, number, shortcut,
    groups, expansion.
    """
    super().__init__()
    self.c = int(c2 * e)  # hidden channels
    self.cv1 = Conv(c1, 2 * self.c, 1, 1)
    self.cv2 = Conv(2 * self.c, c2, 1)  # optional act=FReLU(c2)
    # self.attention = ChannelAttention(2 * self.c)  # or SpatialAttention()
    self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))

forward(x)

Forward pass through the CSP bottleneck with 2 convolutions.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through the CSP bottleneck with 2 convolutions."""
    a, b = self.cv1(x).chunk(2, 1)
    return self.cv2(torch.cat((self.m(a), b), 1))



ultralytics.nn.modules.block.C2f

Bases: Module

Faster Implementation of CSP Bottleneck with 2 convolutions.

Source code in ultralytics/nn/modules/block.py
class C2f(nn.Module):
    """Faster Implementation of CSP Bottleneck with 2 convolutions."""

    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
        """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
        expansion.
        """
        super().__init__()
        self.c = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
        self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))

    def forward(self, x):
        """Forward pass through C2f layer."""
        y = list(self.cv1(x).chunk(2, 1))
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))

    def forward_split(self, x):
        """Forward pass using split() instead of chunk()."""
        y = list(self.cv1(x).split((self.c, self.c), 1))
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))

__init__(c1, c2, n=1, shortcut=False, g=1, e=0.5)

Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups, expansion.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
    """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
    expansion.
    """
    super().__init__()
    self.c = int(c2 * e)  # hidden channels
    self.cv1 = Conv(c1, 2 * self.c, 1, 1)
    self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
    self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))

forward(x)

Forward pass through C2f layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through C2f layer."""
    y = list(self.cv1(x).chunk(2, 1))
    y.extend(m(y[-1]) for m in self.m)
    return self.cv2(torch.cat(y, 1))

forward_split(x)

Forward pass using split() instead of chunk().

Source code in ultralytics/nn/modules/block.py
def forward_split(self, x):
    """Forward pass using split() instead of chunk()."""
    y = list(self.cv1(x).split((self.c, self.c), 1))
    y.extend(m(y[-1]) for m in self.m)
    return self.cv2(torch.cat(y, 1))



ultralytics.nn.modules.block.C3

Bases: Module

CSP Bottleneck with 3 convolutions.

Source code in ultralytics/nn/modules/block.py
class C3(nn.Module):
    """CSP Bottleneck with 3 convolutions."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        """Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values."""
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))

    def forward(self, x):
        """Forward pass through the CSP bottleneck with 2 convolutions."""
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))

__init__(c1, c2, n=1, shortcut=True, g=1, e=0.5)

Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
    """Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values."""
    super().__init__()
    c_ = int(c2 * e)  # hidden channels
    self.cv1 = Conv(c1, c_, 1, 1)
    self.cv2 = Conv(c1, c_, 1, 1)
    self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
    self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))

forward(x)

Forward pass through the CSP bottleneck with 2 convolutions.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through the CSP bottleneck with 2 convolutions."""
    return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))



ultralytics.nn.modules.block.C3x

Bases: C3

C3 module with cross-convolutions.

Source code in ultralytics/nn/modules/block.py
class C3x(C3):
    """C3 module with cross-convolutions."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        """Initialize C3TR instance and set default parameters."""
        super().__init__(c1, c2, n, shortcut, g, e)
        self.c_ = int(c2 * e)
        self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n)))

__init__(c1, c2, n=1, shortcut=True, g=1, e=0.5)

Initialize C3TR instance and set default parameters.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
    """Initialize C3TR instance and set default parameters."""
    super().__init__(c1, c2, n, shortcut, g, e)
    self.c_ = int(c2 * e)
    self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n)))



ultralytics.nn.modules.block.RepC3

Bases: Module

Rep C3.

Source code in ultralytics/nn/modules/block.py
class RepC3(nn.Module):
    """Rep C3."""

    def __init__(self, c1, c2, n=3, e=1.0):
        """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c2, 1, 1)
        self.cv2 = Conv(c1, c2, 1, 1)
        self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)])
        self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity()

    def forward(self, x):
        """Forward pass of RT-DETR neck layer."""
        return self.cv3(self.m(self.cv1(x)) + self.cv2(x))

__init__(c1, c2, n=3, e=1.0)

Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=3, e=1.0):
    """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
    super().__init__()
    c_ = int(c2 * e)  # hidden channels
    self.cv1 = Conv(c1, c2, 1, 1)
    self.cv2 = Conv(c1, c2, 1, 1)
    self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)])
    self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity()

forward(x)

Forward pass of RT-DETR neck layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass of RT-DETR neck layer."""
    return self.cv3(self.m(self.cv1(x)) + self.cv2(x))



ultralytics.nn.modules.block.C3TR

Bases: C3

C3 module with TransformerBlock().

Source code in ultralytics/nn/modules/block.py
class C3TR(C3):
    """C3 module with TransformerBlock()."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        """Initialize C3Ghost module with GhostBottleneck()."""
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)
        self.m = TransformerBlock(c_, c_, 4, n)

__init__(c1, c2, n=1, shortcut=True, g=1, e=0.5)

Initialize C3Ghost module with GhostBottleneck().

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
    """Initialize C3Ghost module with GhostBottleneck()."""
    super().__init__(c1, c2, n, shortcut, g, e)
    c_ = int(c2 * e)
    self.m = TransformerBlock(c_, c_, 4, n)



ultralytics.nn.modules.block.C3Ghost

Bases: C3

C3 module with GhostBottleneck().

Source code in ultralytics/nn/modules/block.py
class C3Ghost(C3):
    """C3 module with GhostBottleneck()."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        """Initialize 'SPP' module with various pooling sizes for spatial pyramid pooling."""
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)  # hidden channels
        self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))

__init__(c1, c2, n=1, shortcut=True, g=1, e=0.5)

Initialize 'SPP' module with various pooling sizes for spatial pyramid pooling.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
    """Initialize 'SPP' module with various pooling sizes for spatial pyramid pooling."""
    super().__init__(c1, c2, n, shortcut, g, e)
    c_ = int(c2 * e)  # hidden channels
    self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))



ultralytics.nn.modules.block.GhostBottleneck

Bases: Module

Ghost Bottleneck https://github.com/huawei-noah/ghostnet.

Source code in ultralytics/nn/modules/block.py
class GhostBottleneck(nn.Module):
    """Ghost Bottleneck https://github.com/huawei-noah/ghostnet."""

    def __init__(self, c1, c2, k=3, s=1):
        """Initializes GhostBottleneck module with arguments ch_in, ch_out, kernel, stride."""
        super().__init__()
        c_ = c2 // 2
        self.conv = nn.Sequential(
            GhostConv(c1, c_, 1, 1),  # pw
            DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
            GhostConv(c_, c2, 1, 1, act=False),  # pw-linear
        )
        self.shortcut = (
            nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
        )

    def forward(self, x):
        """Applies skip connection and concatenation to input tensor."""
        return self.conv(x) + self.shortcut(x)

__init__(c1, c2, k=3, s=1)

Initializes GhostBottleneck module with arguments ch_in, ch_out, kernel, stride.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, k=3, s=1):
    """Initializes GhostBottleneck module with arguments ch_in, ch_out, kernel, stride."""
    super().__init__()
    c_ = c2 // 2
    self.conv = nn.Sequential(
        GhostConv(c1, c_, 1, 1),  # pw
        DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
        GhostConv(c_, c2, 1, 1, act=False),  # pw-linear
    )
    self.shortcut = (
        nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
    )

forward(x)

Applies skip connection and concatenation to input tensor.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Applies skip connection and concatenation to input tensor."""
    return self.conv(x) + self.shortcut(x)



ultralytics.nn.modules.block.Bottleneck

Bases: Module

Standard bottleneck.

Source code in ultralytics/nn/modules/block.py
class Bottleneck(nn.Module):
    """Standard bottleneck."""

    def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
        """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
        expansion.
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, k[0], 1)
        self.cv2 = Conv(c_, c2, k[1], 1, g=g)
        self.add = shortcut and c1 == c2

    def forward(self, x):
        """'forward()' applies the YOLO FPN to input data."""
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))

__init__(c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5)

Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and expansion.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
    """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
    expansion.
    """
    super().__init__()
    c_ = int(c2 * e)  # hidden channels
    self.cv1 = Conv(c1, c_, k[0], 1)
    self.cv2 = Conv(c_, c2, k[1], 1, g=g)
    self.add = shortcut and c1 == c2

forward(x)

'forward()' applies the YOLO FPN to input data.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """'forward()' applies the YOLO FPN to input data."""
    return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))



ultralytics.nn.modules.block.BottleneckCSP

Bases: Module

CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks.

Source code in ultralytics/nn/modules/block.py
class BottleneckCSP(nn.Module):
    """CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        """Initializes the CSP Bottleneck given arguments for ch_in, ch_out, number, shortcut, groups, expansion."""
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
        self.cv4 = Conv(2 * c_, c2, 1, 1)
        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
        self.act = nn.SiLU()
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))

    def forward(self, x):
        """Applies a CSP bottleneck with 3 convolutions."""
        y1 = self.cv3(self.m(self.cv1(x)))
        y2 = self.cv2(x)
        return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))

__init__(c1, c2, n=1, shortcut=True, g=1, e=0.5)

Initializes the CSP Bottleneck given arguments for ch_in, ch_out, number, shortcut, groups, expansion.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
    """Initializes the CSP Bottleneck given arguments for ch_in, ch_out, number, shortcut, groups, expansion."""
    super().__init__()
    c_ = int(c2 * e)  # hidden channels
    self.cv1 = Conv(c1, c_, 1, 1)
    self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
    self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
    self.cv4 = Conv(2 * c_, c2, 1, 1)
    self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
    self.act = nn.SiLU()
    self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))

forward(x)

Applies a CSP bottleneck with 3 convolutions.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Applies a CSP bottleneck with 3 convolutions."""
    y1 = self.cv3(self.m(self.cv1(x)))
    y2 = self.cv2(x)
    return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))



ultralytics.nn.modules.block.ResNetBlock

Bases: Module

ResNet block with standard convolution layers.

Source code in ultralytics/nn/modules/block.py
class ResNetBlock(nn.Module):
    """ResNet block with standard convolution layers."""

    def __init__(self, c1, c2, s=1, e=4):
        """Initialize convolution with given parameters."""
        super().__init__()
        c3 = e * c2
        self.cv1 = Conv(c1, c2, k=1, s=1, act=True)
        self.cv2 = Conv(c2, c2, k=3, s=s, p=1, act=True)
        self.cv3 = Conv(c2, c3, k=1, act=False)
        self.shortcut = nn.Sequential(Conv(c1, c3, k=1, s=s, act=False)) if s != 1 or c1 != c3 else nn.Identity()

    def forward(self, x):
        """Forward pass through the ResNet block."""
        return F.relu(self.cv3(self.cv2(self.cv1(x))) + self.shortcut(x))

__init__(c1, c2, s=1, e=4)

Initialize convolution with given parameters.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, s=1, e=4):
    """Initialize convolution with given parameters."""
    super().__init__()
    c3 = e * c2
    self.cv1 = Conv(c1, c2, k=1, s=1, act=True)
    self.cv2 = Conv(c2, c2, k=3, s=s, p=1, act=True)
    self.cv3 = Conv(c2, c3, k=1, act=False)
    self.shortcut = nn.Sequential(Conv(c1, c3, k=1, s=s, act=False)) if s != 1 or c1 != c3 else nn.Identity()

forward(x)

Forward pass through the ResNet block.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through the ResNet block."""
    return F.relu(self.cv3(self.cv2(self.cv1(x))) + self.shortcut(x))



ultralytics.nn.modules.block.ResNetLayer

Bases: Module

ResNet layer with multiple ResNet blocks.

Source code in ultralytics/nn/modules/block.py
class ResNetLayer(nn.Module):
    """ResNet layer with multiple ResNet blocks."""

    def __init__(self, c1, c2, s=1, is_first=False, n=1, e=4):
        """Initializes the ResNetLayer given arguments."""
        super().__init__()
        self.is_first = is_first

        if self.is_first:
            self.layer = nn.Sequential(
                Conv(c1, c2, k=7, s=2, p=3, act=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
            )
        else:
            blocks = [ResNetBlock(c1, c2, s, e=e)]
            blocks.extend([ResNetBlock(e * c2, c2, 1, e=e) for _ in range(n - 1)])
            self.layer = nn.Sequential(*blocks)

    def forward(self, x):
        """Forward pass through the ResNet layer."""
        return self.layer(x)

__init__(c1, c2, s=1, is_first=False, n=1, e=4)

Initializes the ResNetLayer given arguments.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, s=1, is_first=False, n=1, e=4):
    """Initializes the ResNetLayer given arguments."""
    super().__init__()
    self.is_first = is_first

    if self.is_first:
        self.layer = nn.Sequential(
            Conv(c1, c2, k=7, s=2, p=3, act=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
    else:
        blocks = [ResNetBlock(c1, c2, s, e=e)]
        blocks.extend([ResNetBlock(e * c2, c2, 1, e=e) for _ in range(n - 1)])
        self.layer = nn.Sequential(*blocks)

forward(x)

Forward pass through the ResNet layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through the ResNet layer."""
    return self.layer(x)



ultralytics.nn.modules.block.MaxSigmoidAttnBlock

Bases: Module

Max Sigmoid attention block.

Source code in ultralytics/nn/modules/block.py
class MaxSigmoidAttnBlock(nn.Module):
    """Max Sigmoid attention block."""

    def __init__(self, c1, c2, nh=1, ec=128, gc=512, scale=False):
        """Initializes MaxSigmoidAttnBlock with specified arguments."""
        super().__init__()
        self.nh = nh
        self.hc = c2 // nh
        self.ec = Conv(c1, ec, k=1, act=False) if c1 != ec else None
        self.gl = nn.Linear(gc, ec)
        self.bias = nn.Parameter(torch.zeros(nh))
        self.proj_conv = Conv(c1, c2, k=3, s=1, act=False)
        self.scale = nn.Parameter(torch.ones(1, nh, 1, 1)) if scale else 1.0

    def forward(self, x, guide):
        """Forward process."""
        bs, _, h, w = x.shape

        guide = self.gl(guide)
        guide = guide.view(bs, -1, self.nh, self.hc)
        embed = self.ec(x) if self.ec is not None else x
        embed = embed.view(bs, self.nh, self.hc, h, w)

        aw = torch.einsum("bmchw,bnmc->bmhwn", embed, guide)
        aw = aw.max(dim=-1)[0]
        aw = aw / (self.hc**0.5)
        aw = aw + self.bias[None, :, None, None]
        aw = aw.sigmoid() * self.scale

        x = self.proj_conv(x)
        x = x.view(bs, self.nh, -1, h, w)
        x = x * aw.unsqueeze(2)
        return x.view(bs, -1, h, w)

__init__(c1, c2, nh=1, ec=128, gc=512, scale=False)

Initializes MaxSigmoidAttnBlock with specified arguments.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, nh=1, ec=128, gc=512, scale=False):
    """Initializes MaxSigmoidAttnBlock with specified arguments."""
    super().__init__()
    self.nh = nh
    self.hc = c2 // nh
    self.ec = Conv(c1, ec, k=1, act=False) if c1 != ec else None
    self.gl = nn.Linear(gc, ec)
    self.bias = nn.Parameter(torch.zeros(nh))
    self.proj_conv = Conv(c1, c2, k=3, s=1, act=False)
    self.scale = nn.Parameter(torch.ones(1, nh, 1, 1)) if scale else 1.0

forward(x, guide)

Forward process.

Source code in ultralytics/nn/modules/block.py
def forward(self, x, guide):
    """Forward process."""
    bs, _, h, w = x.shape

    guide = self.gl(guide)
    guide = guide.view(bs, -1, self.nh, self.hc)
    embed = self.ec(x) if self.ec is not None else x
    embed = embed.view(bs, self.nh, self.hc, h, w)

    aw = torch.einsum("bmchw,bnmc->bmhwn", embed, guide)
    aw = aw.max(dim=-1)[0]
    aw = aw / (self.hc**0.5)
    aw = aw + self.bias[None, :, None, None]
    aw = aw.sigmoid() * self.scale

    x = self.proj_conv(x)
    x = x.view(bs, self.nh, -1, h, w)
    x = x * aw.unsqueeze(2)
    return x.view(bs, -1, h, w)



ultralytics.nn.modules.block.C2fAttn

Bases: Module

C2f module with an additional attn module.

Source code in ultralytics/nn/modules/block.py
class C2fAttn(nn.Module):
    """C2f module with an additional attn module."""

    def __init__(self, c1, c2, n=1, ec=128, nh=1, gc=512, shortcut=False, g=1, e=0.5):
        """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
        expansion.
        """
        super().__init__()
        self.c = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
        self.cv2 = Conv((3 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
        self.attn = MaxSigmoidAttnBlock(self.c, self.c, gc=gc, ec=ec, nh=nh)

    def forward(self, x, guide):
        """Forward pass through C2f layer."""
        y = list(self.cv1(x).chunk(2, 1))
        y.extend(m(y[-1]) for m in self.m)
        y.append(self.attn(y[-1], guide))
        return self.cv2(torch.cat(y, 1))

    def forward_split(self, x, guide):
        """Forward pass using split() instead of chunk()."""
        y = list(self.cv1(x).split((self.c, self.c), 1))
        y.extend(m(y[-1]) for m in self.m)
        y.append(self.attn(y[-1], guide))
        return self.cv2(torch.cat(y, 1))

__init__(c1, c2, n=1, ec=128, nh=1, gc=512, shortcut=False, g=1, e=0.5)

Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups, expansion.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, ec=128, nh=1, gc=512, shortcut=False, g=1, e=0.5):
    """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
    expansion.
    """
    super().__init__()
    self.c = int(c2 * e)  # hidden channels
    self.cv1 = Conv(c1, 2 * self.c, 1, 1)
    self.cv2 = Conv((3 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
    self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
    self.attn = MaxSigmoidAttnBlock(self.c, self.c, gc=gc, ec=ec, nh=nh)

forward(x, guide)

Forward pass through C2f layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x, guide):
    """Forward pass through C2f layer."""
    y = list(self.cv1(x).chunk(2, 1))
    y.extend(m(y[-1]) for m in self.m)
    y.append(self.attn(y[-1], guide))
    return self.cv2(torch.cat(y, 1))

forward_split(x, guide)

Forward pass using split() instead of chunk().

Source code in ultralytics/nn/modules/block.py
def forward_split(self, x, guide):
    """Forward pass using split() instead of chunk()."""
    y = list(self.cv1(x).split((self.c, self.c), 1))
    y.extend(m(y[-1]) for m in self.m)
    y.append(self.attn(y[-1], guide))
    return self.cv2(torch.cat(y, 1))



ultralytics.nn.modules.block.ImagePoolingAttn

Bases: Module

ImagePoolingAttn: Enhance the text embeddings with image-aware information.

Source code in ultralytics/nn/modules/block.py
class ImagePoolingAttn(nn.Module):
    """ImagePoolingAttn: Enhance the text embeddings with image-aware information."""

    def __init__(self, ec=256, ch=(), ct=512, nh=8, k=3, scale=False):
        """Initializes ImagePoolingAttn with specified arguments."""
        super().__init__()

        nf = len(ch)
        self.query = nn.Sequential(nn.LayerNorm(ct), nn.Linear(ct, ec))
        self.key = nn.Sequential(nn.LayerNorm(ec), nn.Linear(ec, ec))
        self.value = nn.Sequential(nn.LayerNorm(ec), nn.Linear(ec, ec))
        self.proj = nn.Linear(ec, ct)
        self.scale = nn.Parameter(torch.tensor([0.0]), requires_grad=True) if scale else 1.0
        self.projections = nn.ModuleList([nn.Conv2d(in_channels, ec, kernel_size=1) for in_channels in ch])
        self.im_pools = nn.ModuleList([nn.AdaptiveMaxPool2d((k, k)) for _ in range(nf)])
        self.ec = ec
        self.nh = nh
        self.nf = nf
        self.hc = ec // nh
        self.k = k

    def forward(self, x, text):
        """Executes attention mechanism on input tensor x and guide tensor."""
        bs = x[0].shape[0]
        assert len(x) == self.nf
        num_patches = self.k**2
        x = [pool(proj(x)).view(bs, -1, num_patches) for (x, proj, pool) in zip(x, self.projections, self.im_pools)]
        x = torch.cat(x, dim=-1).transpose(1, 2)
        q = self.query(text)
        k = self.key(x)
        v = self.value(x)

        # q = q.reshape(1, text.shape[1], self.nh, self.hc).repeat(bs, 1, 1, 1)
        q = q.reshape(bs, -1, self.nh, self.hc)
        k = k.reshape(bs, -1, self.nh, self.hc)
        v = v.reshape(bs, -1, self.nh, self.hc)

        aw = torch.einsum("bnmc,bkmc->bmnk", q, k)
        aw = aw / (self.hc**0.5)
        aw = F.softmax(aw, dim=-1)

        x = torch.einsum("bmnk,bkmc->bnmc", aw, v)
        x = self.proj(x.reshape(bs, -1, self.ec))
        return x * self.scale + text

__init__(ec=256, ch=(), ct=512, nh=8, k=3, scale=False)

Initializes ImagePoolingAttn with specified arguments.

Source code in ultralytics/nn/modules/block.py
def __init__(self, ec=256, ch=(), ct=512, nh=8, k=3, scale=False):
    """Initializes ImagePoolingAttn with specified arguments."""
    super().__init__()

    nf = len(ch)
    self.query = nn.Sequential(nn.LayerNorm(ct), nn.Linear(ct, ec))
    self.key = nn.Sequential(nn.LayerNorm(ec), nn.Linear(ec, ec))
    self.value = nn.Sequential(nn.LayerNorm(ec), nn.Linear(ec, ec))
    self.proj = nn.Linear(ec, ct)
    self.scale = nn.Parameter(torch.tensor([0.0]), requires_grad=True) if scale else 1.0
    self.projections = nn.ModuleList([nn.Conv2d(in_channels, ec, kernel_size=1) for in_channels in ch])
    self.im_pools = nn.ModuleList([nn.AdaptiveMaxPool2d((k, k)) for _ in range(nf)])
    self.ec = ec
    self.nh = nh
    self.nf = nf
    self.hc = ec // nh
    self.k = k

forward(x, text)

Executes attention mechanism on input tensor x and guide tensor.

Source code in ultralytics/nn/modules/block.py
def forward(self, x, text):
    """Executes attention mechanism on input tensor x and guide tensor."""
    bs = x[0].shape[0]
    assert len(x) == self.nf
    num_patches = self.k**2
    x = [pool(proj(x)).view(bs, -1, num_patches) for (x, proj, pool) in zip(x, self.projections, self.im_pools)]
    x = torch.cat(x, dim=-1).transpose(1, 2)
    q = self.query(text)
    k = self.key(x)
    v = self.value(x)

    # q = q.reshape(1, text.shape[1], self.nh, self.hc).repeat(bs, 1, 1, 1)
    q = q.reshape(bs, -1, self.nh, self.hc)
    k = k.reshape(bs, -1, self.nh, self.hc)
    v = v.reshape(bs, -1, self.nh, self.hc)

    aw = torch.einsum("bnmc,bkmc->bmnk", q, k)
    aw = aw / (self.hc**0.5)
    aw = F.softmax(aw, dim=-1)

    x = torch.einsum("bmnk,bkmc->bnmc", aw, v)
    x = self.proj(x.reshape(bs, -1, self.ec))
    return x * self.scale + text



ultralytics.nn.modules.block.ContrastiveHead

Bases: Module

Contrastive Head for YOLO-World compute the region-text scores according to the similarity between image and text features.

Source code in ultralytics/nn/modules/block.py
class ContrastiveHead(nn.Module):
    """Contrastive Head for YOLO-World compute the region-text scores according to the similarity between image and text
    features.
    """

    def __init__(self):
        """Initializes ContrastiveHead with specified region-text similarity parameters."""
        super().__init__()
        # NOTE: use -10.0 to keep the init cls loss consistency with other losses
        self.bias = nn.Parameter(torch.tensor([-10.0]))
        self.logit_scale = nn.Parameter(torch.ones([]) * torch.tensor(1 / 0.07).log())

    def forward(self, x, w):
        """Forward function of contrastive learning."""
        x = F.normalize(x, dim=1, p=2)
        w = F.normalize(w, dim=-1, p=2)
        x = torch.einsum("bchw,bkc->bkhw", x, w)
        return x * self.logit_scale.exp() + self.bias

__init__()

Initializes ContrastiveHead with specified region-text similarity parameters.

Source code in ultralytics/nn/modules/block.py
def __init__(self):
    """Initializes ContrastiveHead with specified region-text similarity parameters."""
    super().__init__()
    # NOTE: use -10.0 to keep the init cls loss consistency with other losses
    self.bias = nn.Parameter(torch.tensor([-10.0]))
    self.logit_scale = nn.Parameter(torch.ones([]) * torch.tensor(1 / 0.07).log())

forward(x, w)

Forward function of contrastive learning.

Source code in ultralytics/nn/modules/block.py
def forward(self, x, w):
    """Forward function of contrastive learning."""
    x = F.normalize(x, dim=1, p=2)
    w = F.normalize(w, dim=-1, p=2)
    x = torch.einsum("bchw,bkc->bkhw", x, w)
    return x * self.logit_scale.exp() + self.bias



ultralytics.nn.modules.block.BNContrastiveHead

Bases: Module

Batch Norm Contrastive Head for YOLO-World using batch norm instead of l2-normalization.

Parameters:

Name Type Description Default
embed_dims int

Embed dimensions of text and image features.

required
Source code in ultralytics/nn/modules/block.py
class BNContrastiveHead(nn.Module):
    """
    Batch Norm Contrastive Head for YOLO-World using batch norm instead of l2-normalization.

    Args:
        embed_dims (int): Embed dimensions of text and image features.
    """

    def __init__(self, embed_dims: int):
        """Initialize ContrastiveHead with region-text similarity parameters."""
        super().__init__()
        self.norm = nn.BatchNorm2d(embed_dims)
        # NOTE: use -10.0 to keep the init cls loss consistency with other losses
        self.bias = nn.Parameter(torch.tensor([-10.0]))
        # use -1.0 is more stable
        self.logit_scale = nn.Parameter(-1.0 * torch.ones([]))

    def forward(self, x, w):
        """Forward function of contrastive learning."""
        x = self.norm(x)
        w = F.normalize(w, dim=-1, p=2)
        x = torch.einsum("bchw,bkc->bkhw", x, w)
        return x * self.logit_scale.exp() + self.bias

__init__(embed_dims)

Initialize ContrastiveHead with region-text similarity parameters.

Source code in ultralytics/nn/modules/block.py
def __init__(self, embed_dims: int):
    """Initialize ContrastiveHead with region-text similarity parameters."""
    super().__init__()
    self.norm = nn.BatchNorm2d(embed_dims)
    # NOTE: use -10.0 to keep the init cls loss consistency with other losses
    self.bias = nn.Parameter(torch.tensor([-10.0]))
    # use -1.0 is more stable
    self.logit_scale = nn.Parameter(-1.0 * torch.ones([]))

forward(x, w)

Forward function of contrastive learning.

Source code in ultralytics/nn/modules/block.py
def forward(self, x, w):
    """Forward function of contrastive learning."""
    x = self.norm(x)
    w = F.normalize(w, dim=-1, p=2)
    x = torch.einsum("bchw,bkc->bkhw", x, w)
    return x * self.logit_scale.exp() + self.bias



ultralytics.nn.modules.block.RepBottleneck

Bases: Bottleneck

Rep bottleneck.

Source code in ultralytics/nn/modules/block.py
class RepBottleneck(Bottleneck):
    """Rep bottleneck."""

    def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
        """Initializes a RepBottleneck module with customizable in/out channels, shortcut option, groups and expansion
        ratio.
        """
        super().__init__(c1, c2, shortcut, g, k, e)
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = RepConv(c1, c_, k[0], 1)

__init__(c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5)

Initializes a RepBottleneck module with customizable in/out channels, shortcut option, groups and expansion ratio.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
    """Initializes a RepBottleneck module with customizable in/out channels, shortcut option, groups and expansion
    ratio.
    """
    super().__init__(c1, c2, shortcut, g, k, e)
    c_ = int(c2 * e)  # hidden channels
    self.cv1 = RepConv(c1, c_, k[0], 1)



ultralytics.nn.modules.block.RepCSP

Bases: C3

Rep CSP Bottleneck with 3 convolutions.

Source code in ultralytics/nn/modules/block.py
class RepCSP(C3):
    """Rep CSP Bottleneck with 3 convolutions."""

    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        """Initializes RepCSP layer with given channels, repetitions, shortcut, groups and expansion ratio."""
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)  # hidden channels
        self.m = nn.Sequential(*(RepBottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))

__init__(c1, c2, n=1, shortcut=True, g=1, e=0.5)

Initializes RepCSP layer with given channels, repetitions, shortcut, groups and expansion ratio.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
    """Initializes RepCSP layer with given channels, repetitions, shortcut, groups and expansion ratio."""
    super().__init__(c1, c2, n, shortcut, g, e)
    c_ = int(c2 * e)  # hidden channels
    self.m = nn.Sequential(*(RepBottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))



ultralytics.nn.modules.block.RepNCSPELAN4

Bases: Module

CSP-ELAN.

Source code in ultralytics/nn/modules/block.py
class RepNCSPELAN4(nn.Module):
    """CSP-ELAN."""

    def __init__(self, c1, c2, c3, c4, n=1):
        """Initializes CSP-ELAN layer with specified channel sizes, repetitions, and convolutions."""
        super().__init__()
        self.c = c3 // 2
        self.cv1 = Conv(c1, c3, 1, 1)
        self.cv2 = nn.Sequential(RepCSP(c3 // 2, c4, n), Conv(c4, c4, 3, 1))
        self.cv3 = nn.Sequential(RepCSP(c4, c4, n), Conv(c4, c4, 3, 1))
        self.cv4 = Conv(c3 + (2 * c4), c2, 1, 1)

    def forward(self, x):
        """Forward pass through RepNCSPELAN4 layer."""
        y = list(self.cv1(x).chunk(2, 1))
        y.extend((m(y[-1])) for m in [self.cv2, self.cv3])
        return self.cv4(torch.cat(y, 1))

    def forward_split(self, x):
        """Forward pass using split() instead of chunk()."""
        y = list(self.cv1(x).split((self.c, self.c), 1))
        y.extend(m(y[-1]) for m in [self.cv2, self.cv3])
        return self.cv4(torch.cat(y, 1))

__init__(c1, c2, c3, c4, n=1)

Initializes CSP-ELAN layer with specified channel sizes, repetitions, and convolutions.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, c3, c4, n=1):
    """Initializes CSP-ELAN layer with specified channel sizes, repetitions, and convolutions."""
    super().__init__()
    self.c = c3 // 2
    self.cv1 = Conv(c1, c3, 1, 1)
    self.cv2 = nn.Sequential(RepCSP(c3 // 2, c4, n), Conv(c4, c4, 3, 1))
    self.cv3 = nn.Sequential(RepCSP(c4, c4, n), Conv(c4, c4, 3, 1))
    self.cv4 = Conv(c3 + (2 * c4), c2, 1, 1)

forward(x)

Forward pass through RepNCSPELAN4 layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through RepNCSPELAN4 layer."""
    y = list(self.cv1(x).chunk(2, 1))
    y.extend((m(y[-1])) for m in [self.cv2, self.cv3])
    return self.cv4(torch.cat(y, 1))

forward_split(x)

Forward pass using split() instead of chunk().

Source code in ultralytics/nn/modules/block.py
def forward_split(self, x):
    """Forward pass using split() instead of chunk()."""
    y = list(self.cv1(x).split((self.c, self.c), 1))
    y.extend(m(y[-1]) for m in [self.cv2, self.cv3])
    return self.cv4(torch.cat(y, 1))



ultralytics.nn.modules.block.ELAN1

Bases: RepNCSPELAN4

ELAN1 module with 4 convolutions.

Source code in ultralytics/nn/modules/block.py
class ELAN1(RepNCSPELAN4):
    """ELAN1 module with 4 convolutions."""

    def __init__(self, c1, c2, c3, c4):
        """Initializes ELAN1 layer with specified channel sizes."""
        super().__init__(c1, c2, c3, c4)
        self.c = c3 // 2
        self.cv1 = Conv(c1, c3, 1, 1)
        self.cv2 = Conv(c3 // 2, c4, 3, 1)
        self.cv3 = Conv(c4, c4, 3, 1)
        self.cv4 = Conv(c3 + (2 * c4), c2, 1, 1)

__init__(c1, c2, c3, c4)

Initializes ELAN1 layer with specified channel sizes.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, c3, c4):
    """Initializes ELAN1 layer with specified channel sizes."""
    super().__init__(c1, c2, c3, c4)
    self.c = c3 // 2
    self.cv1 = Conv(c1, c3, 1, 1)
    self.cv2 = Conv(c3 // 2, c4, 3, 1)
    self.cv3 = Conv(c4, c4, 3, 1)
    self.cv4 = Conv(c3 + (2 * c4), c2, 1, 1)



ultralytics.nn.modules.block.AConv

Bases: Module

AConv.

Source code in ultralytics/nn/modules/block.py
class AConv(nn.Module):
    """AConv."""

    def __init__(self, c1, c2):
        """Initializes AConv module with convolution layers."""
        super().__init__()
        self.cv1 = Conv(c1, c2, 3, 2, 1)

    def forward(self, x):
        """Forward pass through AConv layer."""
        x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
        return self.cv1(x)

__init__(c1, c2)

Initializes AConv module with convolution layers.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2):
    """Initializes AConv module with convolution layers."""
    super().__init__()
    self.cv1 = Conv(c1, c2, 3, 2, 1)

forward(x)

Forward pass through AConv layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through AConv layer."""
    x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
    return self.cv1(x)



ultralytics.nn.modules.block.ADown

Bases: Module

ADown.

Source code in ultralytics/nn/modules/block.py
class ADown(nn.Module):
    """ADown."""

    def __init__(self, c1, c2):
        """Initializes ADown module with convolution layers to downsample input from channels c1 to c2."""
        super().__init__()
        self.c = c2 // 2
        self.cv1 = Conv(c1 // 2, self.c, 3, 2, 1)
        self.cv2 = Conv(c1 // 2, self.c, 1, 1, 0)

    def forward(self, x):
        """Forward pass through ADown layer."""
        x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
        x1, x2 = x.chunk(2, 1)
        x1 = self.cv1(x1)
        x2 = torch.nn.functional.max_pool2d(x2, 3, 2, 1)
        x2 = self.cv2(x2)
        return torch.cat((x1, x2), 1)

__init__(c1, c2)

Initializes ADown module with convolution layers to downsample input from channels c1 to c2.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2):
    """Initializes ADown module with convolution layers to downsample input from channels c1 to c2."""
    super().__init__()
    self.c = c2 // 2
    self.cv1 = Conv(c1 // 2, self.c, 3, 2, 1)
    self.cv2 = Conv(c1 // 2, self.c, 1, 1, 0)

forward(x)

Forward pass through ADown layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through ADown layer."""
    x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
    x1, x2 = x.chunk(2, 1)
    x1 = self.cv1(x1)
    x2 = torch.nn.functional.max_pool2d(x2, 3, 2, 1)
    x2 = self.cv2(x2)
    return torch.cat((x1, x2), 1)



ultralytics.nn.modules.block.SPPELAN

Bases: Module

SPP-ELAN.

Source code in ultralytics/nn/modules/block.py
class SPPELAN(nn.Module):
    """SPP-ELAN."""

    def __init__(self, c1, c2, c3, k=5):
        """Initializes SPP-ELAN block with convolution and max pooling layers for spatial pyramid pooling."""
        super().__init__()
        self.c = c3
        self.cv1 = Conv(c1, c3, 1, 1)
        self.cv2 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
        self.cv3 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
        self.cv4 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
        self.cv5 = Conv(4 * c3, c2, 1, 1)

    def forward(self, x):
        """Forward pass through SPPELAN layer."""
        y = [self.cv1(x)]
        y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4])
        return self.cv5(torch.cat(y, 1))

__init__(c1, c2, c3, k=5)

Initializes SPP-ELAN block with convolution and max pooling layers for spatial pyramid pooling.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, c3, k=5):
    """Initializes SPP-ELAN block with convolution and max pooling layers for spatial pyramid pooling."""
    super().__init__()
    self.c = c3
    self.cv1 = Conv(c1, c3, 1, 1)
    self.cv2 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
    self.cv3 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
    self.cv4 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
    self.cv5 = Conv(4 * c3, c2, 1, 1)

forward(x)

Forward pass through SPPELAN layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through SPPELAN layer."""
    y = [self.cv1(x)]
    y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4])
    return self.cv5(torch.cat(y, 1))



ultralytics.nn.modules.block.CBLinear

Bases: Module

CBLinear.

Source code in ultralytics/nn/modules/block.py
class CBLinear(nn.Module):
    """CBLinear."""

    def __init__(self, c1, c2s, k=1, s=1, p=None, g=1):
        """Initializes the CBLinear module, passing inputs unchanged."""
        super(CBLinear, self).__init__()
        self.c2s = c2s
        self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)

    def forward(self, x):
        """Forward pass through CBLinear layer."""
        return self.conv(x).split(self.c2s, dim=1)

__init__(c1, c2s, k=1, s=1, p=None, g=1)

Initializes the CBLinear module, passing inputs unchanged.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2s, k=1, s=1, p=None, g=1):
    """Initializes the CBLinear module, passing inputs unchanged."""
    super(CBLinear, self).__init__()
    self.c2s = c2s
    self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)

forward(x)

Forward pass through CBLinear layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """Forward pass through CBLinear layer."""
    return self.conv(x).split(self.c2s, dim=1)



ultralytics.nn.modules.block.CBFuse

Bases: Module

CBFuse.

Source code in ultralytics/nn/modules/block.py
class CBFuse(nn.Module):
    """CBFuse."""

    def __init__(self, idx):
        """Initializes CBFuse module with layer index for selective feature fusion."""
        super(CBFuse, self).__init__()
        self.idx = idx

    def forward(self, xs):
        """Forward pass through CBFuse layer."""
        target_size = xs[-1].shape[2:]
        res = [F.interpolate(x[self.idx[i]], size=target_size, mode="nearest") for i, x in enumerate(xs[:-1])]
        return torch.sum(torch.stack(res + xs[-1:]), dim=0)

__init__(idx)

Initializes CBFuse module with layer index for selective feature fusion.

Source code in ultralytics/nn/modules/block.py
def __init__(self, idx):
    """Initializes CBFuse module with layer index for selective feature fusion."""
    super(CBFuse, self).__init__()
    self.idx = idx

forward(xs)

Forward pass through CBFuse layer.

Source code in ultralytics/nn/modules/block.py
def forward(self, xs):
    """Forward pass through CBFuse layer."""
    target_size = xs[-1].shape[2:]
    res = [F.interpolate(x[self.idx[i]], size=target_size, mode="nearest") for i, x in enumerate(xs[:-1])]
    return torch.sum(torch.stack(res + xs[-1:]), dim=0)



ultralytics.nn.modules.block.RepVGGDW

Bases: Module

RepVGGDW is a class that represents a depth wise separable convolutional block in RepVGG architecture.

Source code in ultralytics/nn/modules/block.py
class RepVGGDW(torch.nn.Module):
    """RepVGGDW is a class that represents a depth wise separable convolutional block in RepVGG architecture."""

    def __init__(self, ed) -> None:
        """Initializes RepVGGDW with depthwise separable convolutional layers for efficient processing."""
        super().__init__()
        self.conv = Conv(ed, ed, 7, 1, 3, g=ed, act=False)
        self.conv1 = Conv(ed, ed, 3, 1, 1, g=ed, act=False)
        self.dim = ed
        self.act = nn.SiLU()

    def forward(self, x):
        """
        Performs a forward pass of the RepVGGDW block.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor after applying the depth wise separable convolution.
        """
        return self.act(self.conv(x) + self.conv1(x))

    def forward_fuse(self, x):
        """
        Performs a forward pass of the RepVGGDW block without fusing the convolutions.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor after applying the depth wise separable convolution.
        """
        return self.act(self.conv(x))

    @torch.no_grad()
    def fuse(self):
        """
        Fuses the convolutional layers in the RepVGGDW block.

        This method fuses the convolutional layers and updates the weights and biases accordingly.
        """
        conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
        conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)

        conv_w = conv.weight
        conv_b = conv.bias
        conv1_w = conv1.weight
        conv1_b = conv1.bias

        conv1_w = torch.nn.functional.pad(conv1_w, [2, 2, 2, 2])

        final_conv_w = conv_w + conv1_w
        final_conv_b = conv_b + conv1_b

        conv.weight.data.copy_(final_conv_w)
        conv.bias.data.copy_(final_conv_b)

        self.conv = conv
        del self.conv1

__init__(ed)

Initializes RepVGGDW with depthwise separable convolutional layers for efficient processing.

Source code in ultralytics/nn/modules/block.py
def __init__(self, ed) -> None:
    """Initializes RepVGGDW with depthwise separable convolutional layers for efficient processing."""
    super().__init__()
    self.conv = Conv(ed, ed, 7, 1, 3, g=ed, act=False)
    self.conv1 = Conv(ed, ed, 3, 1, 1, g=ed, act=False)
    self.dim = ed
    self.act = nn.SiLU()

forward(x)

Performs a forward pass of the RepVGGDW block.

Parameters:

Name Type Description Default
x Tensor

Input tensor.

required

Returns:

Type Description
Tensor

Output tensor after applying the depth wise separable convolution.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """
    Performs a forward pass of the RepVGGDW block.

    Args:
        x (torch.Tensor): Input tensor.

    Returns:
        (torch.Tensor): Output tensor after applying the depth wise separable convolution.
    """
    return self.act(self.conv(x) + self.conv1(x))

forward_fuse(x)

Performs a forward pass of the RepVGGDW block without fusing the convolutions.

Parameters:

Name Type Description Default
x Tensor

Input tensor.

required

Returns:

Type Description
Tensor

Output tensor after applying the depth wise separable convolution.

Source code in ultralytics/nn/modules/block.py
def forward_fuse(self, x):
    """
    Performs a forward pass of the RepVGGDW block without fusing the convolutions.

    Args:
        x (torch.Tensor): Input tensor.

    Returns:
        (torch.Tensor): Output tensor after applying the depth wise separable convolution.
    """
    return self.act(self.conv(x))

fuse()

Fuses the convolutional layers in the RepVGGDW block.

This method fuses the convolutional layers and updates the weights and biases accordingly.

Source code in ultralytics/nn/modules/block.py
@torch.no_grad()
def fuse(self):
    """
    Fuses the convolutional layers in the RepVGGDW block.

    This method fuses the convolutional layers and updates the weights and biases accordingly.
    """
    conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
    conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)

    conv_w = conv.weight
    conv_b = conv.bias
    conv1_w = conv1.weight
    conv1_b = conv1.bias

    conv1_w = torch.nn.functional.pad(conv1_w, [2, 2, 2, 2])

    final_conv_w = conv_w + conv1_w
    final_conv_b = conv_b + conv1_b

    conv.weight.data.copy_(final_conv_w)
    conv.bias.data.copy_(final_conv_b)

    self.conv = conv
    del self.conv1



ultralytics.nn.modules.block.CIB

Bases: Module

Conditional Identity Block (CIB) module.

Parameters:

Name Type Description Default
c1 int

Number of input channels.

required
c2 int

Number of output channels.

required
shortcut bool

Whether to add a shortcut connection. Defaults to True.

True
e float

Scaling factor for the hidden channels. Defaults to 0.5.

0.5
lk bool

Whether to use RepVGGDW for the third convolutional layer. Defaults to False.

False
Source code in ultralytics/nn/modules/block.py
class CIB(nn.Module):
    """
    Conditional Identity Block (CIB) module.

    Args:
        c1 (int): Number of input channels.
        c2 (int): Number of output channels.
        shortcut (bool, optional): Whether to add a shortcut connection. Defaults to True.
        e (float, optional): Scaling factor for the hidden channels. Defaults to 0.5.
        lk (bool, optional): Whether to use RepVGGDW for the third convolutional layer. Defaults to False.
    """

    def __init__(self, c1, c2, shortcut=True, e=0.5, lk=False):
        """Initializes the custom model with optional shortcut, scaling factor, and RepVGGDW layer."""
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = nn.Sequential(
            Conv(c1, c1, 3, g=c1),
            Conv(c1, 2 * c_, 1),
            RepVGGDW(2 * c_) if lk else Conv(2 * c_, 2 * c_, 3, g=2 * c_),
            Conv(2 * c_, c2, 1),
            Conv(c2, c2, 3, g=c2),
        )

        self.add = shortcut and c1 == c2

    def forward(self, x):
        """
        Forward pass of the CIB module.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        """
        return x + self.cv1(x) if self.add else self.cv1(x)

__init__(c1, c2, shortcut=True, e=0.5, lk=False)

Initializes the custom model with optional shortcut, scaling factor, and RepVGGDW layer.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, shortcut=True, e=0.5, lk=False):
    """Initializes the custom model with optional shortcut, scaling factor, and RepVGGDW layer."""
    super().__init__()
    c_ = int(c2 * e)  # hidden channels
    self.cv1 = nn.Sequential(
        Conv(c1, c1, 3, g=c1),
        Conv(c1, 2 * c_, 1),
        RepVGGDW(2 * c_) if lk else Conv(2 * c_, 2 * c_, 3, g=2 * c_),
        Conv(2 * c_, c2, 1),
        Conv(c2, c2, 3, g=c2),
    )

    self.add = shortcut and c1 == c2

forward(x)

Forward pass of the CIB module.

Parameters:

Name Type Description Default
x Tensor

Input tensor.

required

Returns:

Type Description
Tensor

Output tensor.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """
    Forward pass of the CIB module.

    Args:
        x (torch.Tensor): Input tensor.

    Returns:
        (torch.Tensor): Output tensor.
    """
    return x + self.cv1(x) if self.add else self.cv1(x)



ultralytics.nn.modules.block.C2fCIB

Bases: C2f

C2fCIB class represents a convolutional block with C2f and CIB modules.

Parameters:

Name Type Description Default
c1 int

Number of input channels.

required
c2 int

Number of output channels.

required
n int

Number of CIB modules to stack. Defaults to 1.

1
shortcut bool

Whether to use shortcut connection. Defaults to False.

False
lk bool

Whether to use local key connection. Defaults to False.

False
g int

Number of groups for grouped convolution. Defaults to 1.

1
e float

Expansion ratio for CIB modules. Defaults to 0.5.

0.5
Source code in ultralytics/nn/modules/block.py
class C2fCIB(C2f):
    """
    C2fCIB class represents a convolutional block with C2f and CIB modules.

    Args:
        c1 (int): Number of input channels.
        c2 (int): Number of output channels.
        n (int, optional): Number of CIB modules to stack. Defaults to 1.
        shortcut (bool, optional): Whether to use shortcut connection. Defaults to False.
        lk (bool, optional): Whether to use local key connection. Defaults to False.
        g (int, optional): Number of groups for grouped convolution. Defaults to 1.
        e (float, optional): Expansion ratio for CIB modules. Defaults to 0.5.
    """

    def __init__(self, c1, c2, n=1, shortcut=False, lk=False, g=1, e=0.5):
        """Initializes the module with specified parameters for channel, shortcut, local key, groups, and expansion."""
        super().__init__(c1, c2, n, shortcut, g, e)
        self.m = nn.ModuleList(CIB(self.c, self.c, shortcut, e=1.0, lk=lk) for _ in range(n))

__init__(c1, c2, n=1, shortcut=False, lk=False, g=1, e=0.5)

Initializes the module with specified parameters for channel, shortcut, local key, groups, and expansion.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, n=1, shortcut=False, lk=False, g=1, e=0.5):
    """Initializes the module with specified parameters for channel, shortcut, local key, groups, and expansion."""
    super().__init__(c1, c2, n, shortcut, g, e)
    self.m = nn.ModuleList(CIB(self.c, self.c, shortcut, e=1.0, lk=lk) for _ in range(n))



ultralytics.nn.modules.block.Attention

Bases: Module

Attention module that performs self-attention on the input tensor.

Parameters:

Name Type Description Default
dim int

The input tensor dimension.

required
num_heads int

The number of attention heads.

8
attn_ratio float

The ratio of the attention key dimension to the head dimension.

0.5

Attributes:

Name Type Description
num_heads int

The number of attention heads.

head_dim int

The dimension of each attention head.

key_dim int

The dimension of the attention key.

scale float

The scaling factor for the attention scores.

qkv Conv

Convolutional layer for computing the query, key, and value.

proj Conv

Convolutional layer for projecting the attended values.

pe Conv

Convolutional layer for positional encoding.

Source code in ultralytics/nn/modules/block.py
class Attention(nn.Module):
    """
    Attention module that performs self-attention on the input tensor.

    Args:
        dim (int): The input tensor dimension.
        num_heads (int): The number of attention heads.
        attn_ratio (float): The ratio of the attention key dimension to the head dimension.

    Attributes:
        num_heads (int): The number of attention heads.
        head_dim (int): The dimension of each attention head.
        key_dim (int): The dimension of the attention key.
        scale (float): The scaling factor for the attention scores.
        qkv (Conv): Convolutional layer for computing the query, key, and value.
        proj (Conv): Convolutional layer for projecting the attended values.
        pe (Conv): Convolutional layer for positional encoding.
    """

    def __init__(self, dim, num_heads=8, attn_ratio=0.5):
        """Initializes multi-head attention module with query, key, and value convolutions and positional encoding."""
        super().__init__()
        self.num_heads = num_heads
        self.head_dim = dim // num_heads
        self.key_dim = int(self.head_dim * attn_ratio)
        self.scale = self.key_dim**-0.5
        nh_kd = self.key_dim * num_heads
        h = dim + nh_kd * 2
        self.qkv = Conv(dim, h, 1, act=False)
        self.proj = Conv(dim, dim, 1, act=False)
        self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)

    def forward(self, x):
        """
        Forward pass of the Attention module.

        Args:
            x (torch.Tensor): The input tensor.

        Returns:
            (torch.Tensor): The output tensor after self-attention.
        """
        B, C, H, W = x.shape
        N = H * W
        qkv = self.qkv(x)
        q, k, v = qkv.view(B, self.num_heads, self.key_dim * 2 + self.head_dim, N).split(
            [self.key_dim, self.key_dim, self.head_dim], dim=2
        )

        attn = (q.transpose(-2, -1) @ k) * self.scale
        attn = attn.softmax(dim=-1)
        x = (v @ attn.transpose(-2, -1)).view(B, C, H, W) + self.pe(v.reshape(B, C, H, W))
        x = self.proj(x)
        return x

__init__(dim, num_heads=8, attn_ratio=0.5)

Initializes multi-head attention module with query, key, and value convolutions and positional encoding.

Source code in ultralytics/nn/modules/block.py
def __init__(self, dim, num_heads=8, attn_ratio=0.5):
    """Initializes multi-head attention module with query, key, and value convolutions and positional encoding."""
    super().__init__()
    self.num_heads = num_heads
    self.head_dim = dim // num_heads
    self.key_dim = int(self.head_dim * attn_ratio)
    self.scale = self.key_dim**-0.5
    nh_kd = self.key_dim * num_heads
    h = dim + nh_kd * 2
    self.qkv = Conv(dim, h, 1, act=False)
    self.proj = Conv(dim, dim, 1, act=False)
    self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)

forward(x)

Forward pass of the Attention module.

Parameters:

Name Type Description Default
x Tensor

The input tensor.

required

Returns:

Type Description
Tensor

The output tensor after self-attention.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """
    Forward pass of the Attention module.

    Args:
        x (torch.Tensor): The input tensor.

    Returns:
        (torch.Tensor): The output tensor after self-attention.
    """
    B, C, H, W = x.shape
    N = H * W
    qkv = self.qkv(x)
    q, k, v = qkv.view(B, self.num_heads, self.key_dim * 2 + self.head_dim, N).split(
        [self.key_dim, self.key_dim, self.head_dim], dim=2
    )

    attn = (q.transpose(-2, -1) @ k) * self.scale
    attn = attn.softmax(dim=-1)
    x = (v @ attn.transpose(-2, -1)).view(B, C, H, W) + self.pe(v.reshape(B, C, H, W))
    x = self.proj(x)
    return x



ultralytics.nn.modules.block.PSA

Bases: Module

Position-wise Spatial Attention module.

Parameters:

Name Type Description Default
c1 int

Number of input channels.

required
c2 int

Number of output channels.

required
e float

Expansion factor for the intermediate channels. Default is 0.5.

0.5

Attributes:

Name Type Description
c int

Number of intermediate channels.

cv1 Conv

1x1 convolution layer to reduce the number of input channels to 2*c.

cv2 Conv

1x1 convolution layer to reduce the number of output channels to c.

attn Attention

Attention module for spatial attention.

ffn Sequential

Feed-forward network module.

Source code in ultralytics/nn/modules/block.py
class PSA(nn.Module):
    """
    Position-wise Spatial Attention module.

    Args:
        c1 (int): Number of input channels.
        c2 (int): Number of output channels.
        e (float): Expansion factor for the intermediate channels. Default is 0.5.

    Attributes:
        c (int): Number of intermediate channels.
        cv1 (Conv): 1x1 convolution layer to reduce the number of input channels to 2*c.
        cv2 (Conv): 1x1 convolution layer to reduce the number of output channels to c.
        attn (Attention): Attention module for spatial attention.
        ffn (nn.Sequential): Feed-forward network module.
    """

    def __init__(self, c1, c2, e=0.5):
        """Initializes convolution layers, attention module, and feed-forward network with channel reduction."""
        super().__init__()
        assert c1 == c2
        self.c = int(c1 * e)
        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
        self.cv2 = Conv(2 * self.c, c1, 1)

        self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
        self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))

    def forward(self, x):
        """
        Forward pass of the PSA module.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        """
        a, b = self.cv1(x).split((self.c, self.c), dim=1)
        b = b + self.attn(b)
        b = b + self.ffn(b)
        return self.cv2(torch.cat((a, b), 1))

__init__(c1, c2, e=0.5)

Initializes convolution layers, attention module, and feed-forward network with channel reduction.

Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, e=0.5):
    """Initializes convolution layers, attention module, and feed-forward network with channel reduction."""
    super().__init__()
    assert c1 == c2
    self.c = int(c1 * e)
    self.cv1 = Conv(c1, 2 * self.c, 1, 1)
    self.cv2 = Conv(2 * self.c, c1, 1)

    self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
    self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))

forward(x)

Forward pass of the PSA module.

Parameters:

Name Type Description Default
x Tensor

Input tensor.

required

Returns:

Type Description
Tensor

Output tensor.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """
    Forward pass of the PSA module.

    Args:
        x (torch.Tensor): Input tensor.

    Returns:
        (torch.Tensor): Output tensor.
    """
    a, b = self.cv1(x).split((self.c, self.c), dim=1)
    b = b + self.attn(b)
    b = b + self.ffn(b)
    return self.cv2(torch.cat((a, b), 1))



ultralytics.nn.modules.block.SCDown

Bases: Module

Spatial Channel Downsample (SCDown) module for reducing spatial and channel dimensions.

Source code in ultralytics/nn/modules/block.py
class SCDown(nn.Module):
    """Spatial Channel Downsample (SCDown) module for reducing spatial and channel dimensions."""

    def __init__(self, c1, c2, k, s):
        """
        Spatial Channel Downsample (SCDown) module.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size for the convolutional layer.
            s (int): Stride for the convolutional layer.
        """
        super().__init__()
        self.cv1 = Conv(c1, c2, 1, 1)
        self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)

    def forward(self, x):
        """
        Forward pass of the SCDown module.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor after applying the SCDown module.
        """
        return self.cv2(self.cv1(x))

__init__(c1, c2, k, s)

Spatial Channel Downsample (SCDown) module.

Parameters:

Name Type Description Default
c1 int

Number of input channels.

required
c2 int

Number of output channels.

required
k int

Kernel size for the convolutional layer.

required
s int

Stride for the convolutional layer.

required
Source code in ultralytics/nn/modules/block.py
def __init__(self, c1, c2, k, s):
    """
    Spatial Channel Downsample (SCDown) module.

    Args:
        c1 (int): Number of input channels.
        c2 (int): Number of output channels.
        k (int): Kernel size for the convolutional layer.
        s (int): Stride for the convolutional layer.
    """
    super().__init__()
    self.cv1 = Conv(c1, c2, 1, 1)
    self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)

forward(x)

Forward pass of the SCDown module.

Parameters:

Name Type Description Default
x Tensor

Input tensor.

required

Returns:

Type Description
Tensor

Output tensor after applying the SCDown module.

Source code in ultralytics/nn/modules/block.py
def forward(self, x):
    """
    Forward pass of the SCDown module.

    Args:
        x (torch.Tensor): Input tensor.

    Returns:
        (torch.Tensor): Output tensor after applying the SCDown module.
    """
    return self.cv2(self.cv1(x))





Created 2023-11-12, Updated 2024-06-20
Authors: Burhan-Q (2), Laughing-q (3), glenn-jocher (7)