Reference for `ultralytics/models/sam/sam3/necks.py`

Improvements

This page is sourced from https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/sam3/necks.py. Have an improvement or example to add? Open a Pull Request — thank you! 🙏

Summary

ClassesMethods

Sam3DualViTDetNeck

Sam3DualViTDetNeck.forward
Sam3DualViTDetNeck.sam_forward_feature_levels
Sam3DualViTDetNeck.set_imgsz

class `ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck`

def __init__(
    self,
    trunk: nn.Module,
    position_encoding: nn.Module,
    d_model: int,
    scale_factors=(4.0, 2.0, 1.0, 0.5),
    add_sam2_neck: bool = False,
)

Bases: nn.Module

A neck that implements a simple FPN as in ViTDet, with support for dual necks (for SAM3 and SAM2).

(From detectron2, very lightly adapted) It supports a "dual neck" setting, where we have two identical necks (for SAM3 and SAM2), with different weights.

:param trunk: the backbone :param position_encoding: the positional encoding to use :param d_model: the dimension of the model

Args

Name	Type	Default
`trunk`	`nn.Module`	required
`position_encoding`	`nn.Module`	required
`d_model`	`int`	required
`scale_factors`		`(4.0, 2.0, 1.0, 0.5)`
`add_sam2_neck`	`bool`	`False`

Methods

Name	Description
`forward`	Get feature maps and positional encodings from the neck.
`sam_forward_feature_levels`	Run neck convolutions and compute positional encodings for each feature level.
`set_imgsz`	Set the image size for the trunk backbone.

Source code in ultralytics/models/sam/sam3/necks.py

View on GitHub

class Sam3DualViTDetNeck(nn.Module):
    """A neck that implements a simple FPN as in ViTDet, with support for dual necks (for SAM3 and SAM2)."""

    def __init__(
        self,
        trunk: nn.Module,
        position_encoding: nn.Module,
        d_model: int,
        scale_factors=(4.0, 2.0, 1.0, 0.5),
        add_sam2_neck: bool = False,
    ):
        """
        SimpleFPN neck a la ViTDet
        (From detectron2, very lightly adapted)
        It supports a "dual neck" setting, where we have two identical necks (for SAM3 and SAM2), with different weights.

        :param trunk: the backbone
        :param position_encoding: the positional encoding to use
        :param d_model: the dimension of the model
        """
        super().__init__()
        self.trunk = trunk
        self.position_encoding = position_encoding
        self.convs = nn.ModuleList()

        self.scale_factors = scale_factors
        use_bias = True
        dim: int = self.trunk.channel_list[-1]

        for _, scale in enumerate(scale_factors):
            current = nn.Sequential()

            if scale == 4.0:
                current.add_module(
                    "dconv_2x2_0",
                    nn.ConvTranspose2d(dim, dim // 2, kernel_size=2, stride=2),
                )
                current.add_module(
                    "gelu",
                    nn.GELU(),
                )
                current.add_module(
                    "dconv_2x2_1",
                    nn.ConvTranspose2d(dim // 2, dim // 4, kernel_size=2, stride=2),
                )
                out_dim = dim // 4
            elif scale == 2.0:
                current.add_module(
                    "dconv_2x2",
                    nn.ConvTranspose2d(dim, dim // 2, kernel_size=2, stride=2),
                )
                out_dim = dim // 2
            elif scale == 1.0:
                out_dim = dim
            elif scale == 0.5:
                current.add_module(
                    "maxpool_2x2",
                    nn.MaxPool2d(kernel_size=2, stride=2),
                )
                out_dim = dim
            else:
                raise NotImplementedError(f"scale_factor={scale} is not supported yet.")

            current.add_module(
                "conv_1x1",
                nn.Conv2d(
                    in_channels=out_dim,
                    out_channels=d_model,
                    kernel_size=1,
                    bias=use_bias,
                ),
            )
            current.add_module(
                "conv_3x3",
                nn.Conv2d(
                    in_channels=d_model,
                    out_channels=d_model,
                    kernel_size=3,
                    padding=1,
                    bias=use_bias,
                ),
            )
            self.convs.append(current)

        self.sam2_convs = None
        if add_sam2_neck:
            # Assumes sam2 neck is just a clone of the original neck
            self.sam2_convs = deepcopy(self.convs)

method `ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck.forward`

def forward(
    self, tensor_list: list[torch.Tensor]
) -> tuple[list[torch.Tensor], list[torch.Tensor], list[torch.Tensor] | None, list[torch.Tensor] | None]

Get feature maps and positional encodings from the neck.

Args

Name	Type	Description	Default
`tensor_list`	`list[torch.Tensor]`		required

Source code in ultralytics/models/sam/sam3/necks.py

View on GitHub

def forward(
    self, tensor_list: list[torch.Tensor]
) -> tuple[list[torch.Tensor], list[torch.Tensor], list[torch.Tensor] | None, list[torch.Tensor] | None]:
    """Get feature maps and positional encodings from the neck."""
    xs = self.trunk(tensor_list)
    x = xs[-1]  # simpleFPN
    sam3_out, sam3_pos = self.sam_forward_feature_levels(x, self.convs)
    if self.sam2_convs is None:
        return sam3_out, sam3_pos, None, None
    sam2_out, sam2_pos = self.sam_forward_feature_levels(x, self.sam2_convs)
    return sam3_out, sam3_pos, sam2_out, sam2_pos

method `ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck.sam_forward_feature_levels`

def sam_forward_feature_levels(
    self, x: torch.Tensor, convs: nn.ModuleList
) -> tuple[list[torch.Tensor], list[torch.Tensor]]

Run neck convolutions and compute positional encodings for each feature level.

Args

Name	Type	Description	Default
`x`	`torch.Tensor`		required
`convs`	`nn.ModuleList`		required

Source code in ultralytics/models/sam/sam3/necks.py

View on GitHub

def sam_forward_feature_levels(
    self, x: torch.Tensor, convs: nn.ModuleList
) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
    """Run neck convolutions and compute positional encodings for each feature level."""
    outs, poss = [], []
    for conv in convs:
        feat = conv(x)
        outs.append(feat)
        poss.append(self.position_encoding(feat).to(feat.dtype))
    return outs, poss

method `ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck.set_imgsz`

def set_imgsz(self, imgsz: list[int] = [1008, 1008])

Set the image size for the trunk backbone.

Args

Name	Type	Description	Default
`imgsz`	`list[int]`		`[1008, 1008]`

Source code in ultralytics/models/sam/sam3/necks.py

View on GitHub

def set_imgsz(self, imgsz: list[int] = [1008, 1008]):
    """Set the image size for the trunk backbone."""
    self.trunk.set_imgsz(imgsz)

📅 Created 25 days ago ✏️ Updated 25 days ago

Reference for ultralytics/models/sam/sam3/necks.py

class ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck

method ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck.forward

method ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck.sam_forward_feature_levels

method ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck.set_imgsz

Reference for `ultralytics/models/sam/sam3/necks.py`

class `ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck`

method `ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck.forward`

method `ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck.sam_forward_feature_levels`

method `ultralytics.models.sam.sam3.necks.Sam3DualViTDetNeck.set_imgsz`