Skip to content

Reference for ultralytics/utils/loss.py

Improvements

This page is sourced from https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/loss.py. Have an improvement or example to add? Open a Pull Request — thank you! 🙏


class ultralytics.utils.loss.VarifocalLoss

VarifocalLoss(self, gamma: float = 2.0, alpha: float = 0.75)

Bases: nn.Module

Varifocal loss by Zhang et al.

Implements the Varifocal Loss function for addressing class imbalance in object detection by focusing on hard-to-classify examples and balancing positive/negative samples.

Args

NameTypeDescriptionDefault
gammafloat2.0
alphafloat0.75

Attributes

NameTypeDescription
gammafloatThe focusing parameter that controls how much the loss focuses on hard-to-classify examples.
alphafloatThe balancing factor used to address class imbalance.
References
https//arxiv.org/abs/2008.13367

Methods

NameDescription
forwardCompute varifocal loss between predictions and ground truth.
Source code in ultralytics/utils/loss.pyView on GitHub
class VarifocalLoss(nn.Module):
    """Varifocal loss by Zhang et al.

    Implements the Varifocal Loss function for addressing class imbalance in object detection by focusing on
    hard-to-classify examples and balancing positive/negative samples.

    Attributes:
        gamma (float): The focusing parameter that controls how much the loss focuses on hard-to-classify examples.
        alpha (float): The balancing factor used to address class imbalance.

    References:
        https://arxiv.org/abs/2008.13367
    """

    def __init__(self, gamma: float = 2.0, alpha: float = 0.75):
        """Initialize the VarifocalLoss class with focusing and balancing parameters."""
        super().__init__()
        self.gamma = gamma
        self.alpha = alpha


method ultralytics.utils.loss.VarifocalLoss.forward

def forward(self, pred_score: torch.Tensor, gt_score: torch.Tensor, label: torch.Tensor) -> torch.Tensor

Compute varifocal loss between predictions and ground truth.

Args

NameTypeDescriptionDefault
pred_scoretorch.Tensorrequired
gt_scoretorch.Tensorrequired
labeltorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
def forward(self, pred_score: torch.Tensor, gt_score: torch.Tensor, label: torch.Tensor) -> torch.Tensor:
    """Compute varifocal loss between predictions and ground truth."""
    weight = self.alpha * pred_score.sigmoid().pow(self.gamma) * (1 - label) + gt_score * label
    with autocast(enabled=False):
        loss = (
            (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight)
            .mean(1)
            .sum()
        )
    return loss





class ultralytics.utils.loss.FocalLoss

FocalLoss(self, gamma: float = 1.5, alpha: float = 0.25)

Bases: nn.Module

Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5).

Implements the Focal Loss function for addressing class imbalance by down-weighting easy examples and focusing on hard negatives during training.

Args

NameTypeDescriptionDefault
gammafloat1.5
alphafloat0.25

Attributes

NameTypeDescription
gammafloatThe focusing parameter that controls how much the loss focuses on hard-to-classify examples.
alphatorch.TensorThe balancing factor used to address class imbalance.

Methods

NameDescription
forwardCalculate focal loss with modulating factors for class imbalance.
Source code in ultralytics/utils/loss.pyView on GitHub
class FocalLoss(nn.Module):
    """Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5).

    Implements the Focal Loss function for addressing class imbalance by down-weighting easy examples and focusing on
    hard negatives during training.

    Attributes:
        gamma (float): The focusing parameter that controls how much the loss focuses on hard-to-classify examples.
        alpha (torch.Tensor): The balancing factor used to address class imbalance.
    """

    def __init__(self, gamma: float = 1.5, alpha: float = 0.25):
        """Initialize FocalLoss class with focusing and balancing parameters."""
        super().__init__()
        self.gamma = gamma
        self.alpha = torch.tensor(alpha)


method ultralytics.utils.loss.FocalLoss.forward

def forward(self, pred: torch.Tensor, label: torch.Tensor) -> torch.Tensor

Calculate focal loss with modulating factors for class imbalance.

Args

NameTypeDescriptionDefault
predtorch.Tensorrequired
labeltorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
def forward(self, pred: torch.Tensor, label: torch.Tensor) -> torch.Tensor:
    """Calculate focal loss with modulating factors for class imbalance."""
    loss = F.binary_cross_entropy_with_logits(pred, label, reduction="none")
    # p_t = torch.exp(-loss)
    # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability

    # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
    pred_prob = pred.sigmoid()  # prob from logits
    p_t = label * pred_prob + (1 - label) * (1 - pred_prob)
    modulating_factor = (1.0 - p_t) ** self.gamma
    loss *= modulating_factor
    if (self.alpha > 0).any():
        self.alpha = self.alpha.to(device=pred.device, dtype=pred.dtype)
        alpha_factor = label * self.alpha + (1 - label) * (1 - self.alpha)
        loss *= alpha_factor
    return loss.mean(1).sum()





class ultralytics.utils.loss.DFLoss

DFLoss(self, reg_max: int = 16) -> None

Bases: nn.Module

Criterion class for computing Distribution Focal Loss (DFL).

Args

NameTypeDescriptionDefault
reg_maxint16

Methods

NameDescription
__call__Return sum of left and right DFL losses from https://ieeexplore.ieee.org/document/9792391.
Source code in ultralytics/utils/loss.pyView on GitHub
class DFLoss(nn.Module):
    """Criterion class for computing Distribution Focal Loss (DFL)."""

    def __init__(self, reg_max: int = 16) -> None:
        """Initialize the DFL module with regularization maximum."""
        super().__init__()
        self.reg_max = reg_max


method ultralytics.utils.loss.DFLoss.__call__

def __call__(self, pred_dist: torch.Tensor, target: torch.Tensor) -> torch.Tensor

Return sum of left and right DFL losses from https://ieeexplore.ieee.org/document/9792391.

Args

NameTypeDescriptionDefault
pred_disttorch.Tensorrequired
targettorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
def __call__(self, pred_dist: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
    """Return sum of left and right DFL losses from https://ieeexplore.ieee.org/document/9792391."""
    target = target.clamp_(0, self.reg_max - 1 - 0.01)
    tl = target.long()  # target left
    tr = tl + 1  # target right
    wl = tr - target  # weight left
    wr = 1 - wl  # weight right
    return (
        F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape) * wl
        + F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape) * wr
    ).mean(-1, keepdim=True)





class ultralytics.utils.loss.BboxLoss

BboxLoss(self, reg_max: int = 16)

Bases: nn.Module

Criterion class for computing training losses for bounding boxes.

Args

NameTypeDescriptionDefault
reg_maxint16

Methods

NameDescription
forwardCompute IoU and DFL losses for bounding boxes.
Source code in ultralytics/utils/loss.pyView on GitHub
class BboxLoss(nn.Module):
    """Criterion class for computing training losses for bounding boxes."""

    def __init__(self, reg_max: int = 16):
        """Initialize the BboxLoss module with regularization maximum and DFL settings."""
        super().__init__()
        self.dfl_loss = DFLoss(reg_max) if reg_max > 1 else None


method ultralytics.utils.loss.BboxLoss.forward

def forward(
    self,
    pred_dist: torch.Tensor,
    pred_bboxes: torch.Tensor,
    anchor_points: torch.Tensor,
    target_bboxes: torch.Tensor,
    target_scores: torch.Tensor,
    target_scores_sum: torch.Tensor,
    fg_mask: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]

Compute IoU and DFL losses for bounding boxes.

Args

NameTypeDescriptionDefault
pred_disttorch.Tensorrequired
pred_bboxestorch.Tensorrequired
anchor_pointstorch.Tensorrequired
target_bboxestorch.Tensorrequired
target_scorestorch.Tensorrequired
target_scores_sumtorch.Tensorrequired
fg_masktorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
def forward(
    self,
    pred_dist: torch.Tensor,
    pred_bboxes: torch.Tensor,
    anchor_points: torch.Tensor,
    target_bboxes: torch.Tensor,
    target_scores: torch.Tensor,
    target_scores_sum: torch.Tensor,
    fg_mask: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]:
    """Compute IoU and DFL losses for bounding boxes."""
    weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
    iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True)
    loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum

    # DFL loss
    if self.dfl_loss:
        target_ltrb = bbox2dist(anchor_points, target_bboxes, self.dfl_loss.reg_max - 1)
        loss_dfl = self.dfl_loss(pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max), target_ltrb[fg_mask]) * weight
        loss_dfl = loss_dfl.sum() / target_scores_sum
    else:
        loss_dfl = torch.tensor(0.0).to(pred_dist.device)

    return loss_iou, loss_dfl





class ultralytics.utils.loss.RotatedBboxLoss

RotatedBboxLoss(self, reg_max: int)

Bases: BboxLoss

Criterion class for computing training losses for rotated bounding boxes.

Args

NameTypeDescriptionDefault
reg_maxintrequired

Methods

NameDescription
forwardCompute IoU and DFL losses for rotated bounding boxes.
Source code in ultralytics/utils/loss.pyView on GitHub
class RotatedBboxLoss(BboxLoss):
    """Criterion class for computing training losses for rotated bounding boxes."""

    def __init__(self, reg_max: int):
        """Initialize the RotatedBboxLoss module with regularization maximum and DFL settings."""
        super().__init__(reg_max)


method ultralytics.utils.loss.RotatedBboxLoss.forward

def forward(
    self,
    pred_dist: torch.Tensor,
    pred_bboxes: torch.Tensor,
    anchor_points: torch.Tensor,
    target_bboxes: torch.Tensor,
    target_scores: torch.Tensor,
    target_scores_sum: torch.Tensor,
    fg_mask: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]

Compute IoU and DFL losses for rotated bounding boxes.

Args

NameTypeDescriptionDefault
pred_disttorch.Tensorrequired
pred_bboxestorch.Tensorrequired
anchor_pointstorch.Tensorrequired
target_bboxestorch.Tensorrequired
target_scorestorch.Tensorrequired
target_scores_sumtorch.Tensorrequired
fg_masktorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
def forward(
    self,
    pred_dist: torch.Tensor,
    pred_bboxes: torch.Tensor,
    anchor_points: torch.Tensor,
    target_bboxes: torch.Tensor,
    target_scores: torch.Tensor,
    target_scores_sum: torch.Tensor,
    fg_mask: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]:
    """Compute IoU and DFL losses for rotated bounding boxes."""
    weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
    iou = probiou(pred_bboxes[fg_mask], target_bboxes[fg_mask])
    loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum

    # DFL loss
    if self.dfl_loss:
        target_ltrb = bbox2dist(anchor_points, xywh2xyxy(target_bboxes[..., :4]), self.dfl_loss.reg_max - 1)
        loss_dfl = self.dfl_loss(pred_dist[fg_mask].view(-1, self.dfl_loss.reg_max), target_ltrb[fg_mask]) * weight
        loss_dfl = loss_dfl.sum() / target_scores_sum
    else:
        loss_dfl = torch.tensor(0.0).to(pred_dist.device)

    return loss_iou, loss_dfl





class ultralytics.utils.loss.KeypointLoss

KeypointLoss(self, sigmas: torch.Tensor) -> None

Bases: nn.Module

Criterion class for computing keypoint losses.

Args

NameTypeDescriptionDefault
sigmastorch.Tensorrequired

Methods

NameDescription
forwardCalculate keypoint loss factor and Euclidean distance loss for keypoints.
Source code in ultralytics/utils/loss.pyView on GitHub
class KeypointLoss(nn.Module):
    """Criterion class for computing keypoint losses."""

    def __init__(self, sigmas: torch.Tensor) -> None:
        """Initialize the KeypointLoss class with keypoint sigmas."""
        super().__init__()
        self.sigmas = sigmas


method ultralytics.utils.loss.KeypointLoss.forward

def forward(
    self, pred_kpts: torch.Tensor, gt_kpts: torch.Tensor, kpt_mask: torch.Tensor, area: torch.Tensor
) -> torch.Tensor

Calculate keypoint loss factor and Euclidean distance loss for keypoints.

Args

NameTypeDescriptionDefault
pred_kptstorch.Tensorrequired
gt_kptstorch.Tensorrequired
kpt_masktorch.Tensorrequired
areatorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
def forward(
    self, pred_kpts: torch.Tensor, gt_kpts: torch.Tensor, kpt_mask: torch.Tensor, area: torch.Tensor
) -> torch.Tensor:
    """Calculate keypoint loss factor and Euclidean distance loss for keypoints."""
    d = (pred_kpts[..., 0] - gt_kpts[..., 0]).pow(2) + (pred_kpts[..., 1] - gt_kpts[..., 1]).pow(2)
    kpt_loss_factor = kpt_mask.shape[1] / (torch.sum(kpt_mask != 0, dim=1) + 1e-9)
    # e = d / (2 * (area * self.sigmas) ** 2 + 1e-9)  # from formula
    e = d / ((2 * self.sigmas).pow(2) * (area + 1e-9) * 2)  # from cocoeval
    return (kpt_loss_factor.view(-1, 1) * ((1 - torch.exp(-e)) * kpt_mask)).mean()





class ultralytics.utils.loss.v8DetectionLoss

v8DetectionLoss(self, model, tal_topk: int = 10)

Criterion class for computing training losses for YOLOv8 object detection.

Args

NameTypeDescriptionDefault
modelrequired
tal_topkint10

Methods

NameDescription
__call__Calculate the sum of the loss for box, cls and dfl multiplied by batch size.
bbox_decodeDecode predicted object bounding box coordinates from anchor points and distribution.
preprocessPreprocess targets by converting to tensor format and scaling coordinates.
Source code in ultralytics/utils/loss.pyView on GitHub
class v8DetectionLoss:
    """Criterion class for computing training losses for YOLOv8 object detection."""

    def __init__(self, model, tal_topk: int = 10):  # model must be de-paralleled
        """Initialize v8DetectionLoss with model parameters and task-aligned assignment settings."""
        device = next(model.parameters()).device  # get model device
        h = model.args  # hyperparameters

        m = model.model[-1]  # Detect() module
        self.bce = nn.BCEWithLogitsLoss(reduction="none")
        self.hyp = h
        self.stride = m.stride  # model strides
        self.nc = m.nc  # number of classes
        self.no = m.nc + m.reg_max * 4
        self.reg_max = m.reg_max
        self.device = device

        self.use_dfl = m.reg_max > 1

        self.assigner = TaskAlignedAssigner(topk=tal_topk, num_classes=self.nc, alpha=0.5, beta=6.0)
        self.bbox_loss = BboxLoss(m.reg_max).to(device)
        self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)


method ultralytics.utils.loss.v8DetectionLoss.__call__

def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]

Calculate the sum of the loss for box, cls and dfl multiplied by batch size.

Args

NameTypeDescriptionDefault
predsAnyrequired
batchdict[str, torch.Tensor]required
Source code in ultralytics/utils/loss.pyView on GitHub
def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
    """Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
    loss = torch.zeros(3, device=self.device)  # box, cls, dfl
    feats = preds[1] if isinstance(preds, tuple) else preds
    pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
        (self.reg_max * 4, self.nc), 1
    )

    pred_scores = pred_scores.permute(0, 2, 1).contiguous()
    pred_distri = pred_distri.permute(0, 2, 1).contiguous()

    dtype = pred_scores.dtype
    batch_size = pred_scores.shape[0]
    imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
    anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

    # Targets
    targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)
    targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
    gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
    mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)

    # Pboxes
    pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)
    # dfl_conf = pred_distri.view(batch_size, -1, 4, self.reg_max).detach().softmax(-1)
    # dfl_conf = (dfl_conf.amax(-1).mean(-1) + dfl_conf.amax(-1).amin(-1)) / 2

    _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
        # pred_scores.detach().sigmoid() * 0.8 + dfl_conf.unsqueeze(-1) * 0.2,
        pred_scores.detach().sigmoid(),
        (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
        anchor_points * stride_tensor,
        gt_labels,
        gt_bboxes,
        mask_gt,
    )

    target_scores_sum = max(target_scores.sum(), 1)

    # Cls loss
    # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
    loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

    # Bbox loss
    if fg_mask.sum():
        loss[0], loss[2] = self.bbox_loss(
            pred_distri,
            pred_bboxes,
            anchor_points,
            target_bboxes / stride_tensor,
            target_scores,
            target_scores_sum,
            fg_mask,
        )

    loss[0] *= self.hyp.box  # box gain
    loss[1] *= self.hyp.cls  # cls gain
    loss[2] *= self.hyp.dfl  # dfl gain

    return loss * batch_size, loss.detach()  # loss(box, cls, dfl)


method ultralytics.utils.loss.v8DetectionLoss.bbox_decode

def bbox_decode(self, anchor_points: torch.Tensor, pred_dist: torch.Tensor) -> torch.Tensor

Decode predicted object bounding box coordinates from anchor points and distribution.

Args

NameTypeDescriptionDefault
anchor_pointstorch.Tensorrequired
pred_disttorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
def bbox_decode(self, anchor_points: torch.Tensor, pred_dist: torch.Tensor) -> torch.Tensor:
    """Decode predicted object bounding box coordinates from anchor points and distribution."""
    if self.use_dfl:
        b, a, c = pred_dist.shape  # batch, anchors, channels
        pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
        # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype))
        # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2)
    return dist2bbox(pred_dist, anchor_points, xywh=False)


method ultralytics.utils.loss.v8DetectionLoss.preprocess

def preprocess(self, targets: torch.Tensor, batch_size: int, scale_tensor: torch.Tensor) -> torch.Tensor

Preprocess targets by converting to tensor format and scaling coordinates.

Args

NameTypeDescriptionDefault
targetstorch.Tensorrequired
batch_sizeintrequired
scale_tensortorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
def preprocess(self, targets: torch.Tensor, batch_size: int, scale_tensor: torch.Tensor) -> torch.Tensor:
    """Preprocess targets by converting to tensor format and scaling coordinates."""
    nl, ne = targets.shape
    if nl == 0:
        out = torch.zeros(batch_size, 0, ne - 1, device=self.device)
    else:
        i = targets[:, 0]  # image index
        _, counts = i.unique(return_counts=True)
        counts = counts.to(dtype=torch.int32)
        out = torch.zeros(batch_size, counts.max(), ne - 1, device=self.device)
        for j in range(batch_size):
            matches = i == j
            if n := matches.sum():
                out[j, :n] = targets[matches, 1:]
        out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor))
    return out





class ultralytics.utils.loss.v8SegmentationLoss

v8SegmentationLoss(self, model)

Bases: v8DetectionLoss

Criterion class for computing training losses for YOLOv8 segmentation.

Args

NameTypeDescriptionDefault
modelrequired

Methods

NameDescription
__call__Calculate and return the combined loss for detection and segmentation.
calculate_segmentation_lossCalculate the loss for instance segmentation.
single_mask_lossCompute the instance segmentation loss for a single image.
Source code in ultralytics/utils/loss.pyView on GitHub
class v8SegmentationLoss(v8DetectionLoss):
    """Criterion class for computing training losses for YOLOv8 segmentation."""

    def __init__(self, model):  # model must be de-paralleled
        """Initialize the v8SegmentationLoss class with model parameters and mask overlap setting."""
        super().__init__(model)
        self.overlap = model.args.overlap_mask


method ultralytics.utils.loss.v8SegmentationLoss.__call__

def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]

Calculate and return the combined loss for detection and segmentation.

Args

NameTypeDescriptionDefault
predsAnyrequired
batchdict[str, torch.Tensor]required
Source code in ultralytics/utils/loss.pyView on GitHub
def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
    """Calculate and return the combined loss for detection and segmentation."""
    loss = torch.zeros(4, device=self.device)  # box, seg, cls, dfl
    feats, pred_masks, proto = preds if len(preds) == 3 else preds[1]
    batch_size, _, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
    pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
        (self.reg_max * 4, self.nc), 1
    )

    # B, grids, ..
    pred_scores = pred_scores.permute(0, 2, 1).contiguous()
    pred_distri = pred_distri.permute(0, 2, 1).contiguous()
    pred_masks = pred_masks.permute(0, 2, 1).contiguous()

    dtype = pred_scores.dtype
    imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
    anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

    # Targets
    try:
        batch_idx = batch["batch_idx"].view(-1, 1)
        targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"]), 1)
        targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
        gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
        mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)
    except RuntimeError as e:
        raise TypeError(
            "ERROR ❌ segment dataset incorrectly formatted or not a segment dataset.\n"
            "This error can occur when incorrectly training a 'segment' model on a 'detect' dataset, "
            "i.e. 'yolo train model=yolo11n-seg.pt data=coco8.yaml'.\nVerify your dataset is a "
            "correctly formatted 'segment' dataset using 'data=coco8-seg.yaml' "
            "as an example.\nSee https://docs.ultralytics.com/datasets/segment/ for help."
        ) from e

    # Pboxes
    pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)

    _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
        pred_scores.detach().sigmoid(),
        (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
        anchor_points * stride_tensor,
        gt_labels,
        gt_bboxes,
        mask_gt,
    )

    target_scores_sum = max(target_scores.sum(), 1)

    # Cls loss
    # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
    loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

    if fg_mask.sum():
        # Bbox loss
        loss[0], loss[3] = self.bbox_loss(
            pred_distri,
            pred_bboxes,
            anchor_points,
            target_bboxes / stride_tensor,
            target_scores,
            target_scores_sum,
            fg_mask,
        )
        # Masks loss
        masks = batch["masks"].to(self.device).float()
        if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
            masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]

        loss[1] = self.calculate_segmentation_loss(
            fg_mask, masks, target_gt_idx, target_bboxes, batch_idx, proto, pred_masks, imgsz, self.overlap
        )

    # WARNING: lines below prevent Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove
    else:
        loss[1] += (proto * 0).sum() + (pred_masks * 0).sum()  # inf sums may lead to nan loss

    loss[0] *= self.hyp.box  # box gain
    loss[1] *= self.hyp.box  # seg gain
    loss[2] *= self.hyp.cls  # cls gain
    loss[3] *= self.hyp.dfl  # dfl gain

    return loss * batch_size, loss.detach()  # loss(box, seg, cls, dfl)


method ultralytics.utils.loss.v8SegmentationLoss.calculate_segmentation_loss

def calculate_segmentation_loss(
    self,
    fg_mask: torch.Tensor,
    masks: torch.Tensor,
    target_gt_idx: torch.Tensor,
    target_bboxes: torch.Tensor,
    batch_idx: torch.Tensor,
    proto: torch.Tensor,
    pred_masks: torch.Tensor,
    imgsz: torch.Tensor,
    overlap: bool,
) -> torch.Tensor

Calculate the loss for instance segmentation.

Args

NameTypeDescriptionDefault
fg_masktorch.TensorA binary tensor of shape (BS, N_anchors) indicating which anchors are positive.required
maskstorch.TensorGround truth masks of shape (BS, H, W) if overlap is False, otherwise (BS, ?, H, W).required
target_gt_idxtorch.TensorIndexes of ground truth objects for each anchor of shape (BS, N_anchors).required
target_bboxestorch.TensorGround truth bounding boxes for each anchor of shape (BS, N_anchors, 4).required
batch_idxtorch.TensorBatch indices of shape (N_labels_in_batch, 1).required
prototorch.TensorPrototype masks of shape (BS, 32, H, W).required
pred_maskstorch.TensorPredicted masks for each anchor of shape (BS, N_anchors, 32).required
imgsztorch.TensorSize of the input image as a tensor of shape (2), i.e., (H, W).required
overlapboolWhether the masks in masks tensor overlap.required

Returns

TypeDescription
torch.TensorThe calculated loss for instance segmentation.

Notes

The batch loss can be computed for improved speed at higher memory usage. For example, pred_mask can be computed as follows: pred_mask = torch.einsum('in,nhw->ihw', pred, proto) # (i, 32) @ (32, 160, 160) -> (i, 160, 160)

Source code in ultralytics/utils/loss.pyView on GitHub
def calculate_segmentation_loss(
    self,
    fg_mask: torch.Tensor,
    masks: torch.Tensor,
    target_gt_idx: torch.Tensor,
    target_bboxes: torch.Tensor,
    batch_idx: torch.Tensor,
    proto: torch.Tensor,
    pred_masks: torch.Tensor,
    imgsz: torch.Tensor,
    overlap: bool,
) -> torch.Tensor:
    """Calculate the loss for instance segmentation.

    Args:
        fg_mask (torch.Tensor): A binary tensor of shape (BS, N_anchors) indicating which anchors are positive.
        masks (torch.Tensor): Ground truth masks of shape (BS, H, W) if `overlap` is False, otherwise (BS, ?, H, W).
        target_gt_idx (torch.Tensor): Indexes of ground truth objects for each anchor of shape (BS, N_anchors).
        target_bboxes (torch.Tensor): Ground truth bounding boxes for each anchor of shape (BS, N_anchors, 4).
        batch_idx (torch.Tensor): Batch indices of shape (N_labels_in_batch, 1).
        proto (torch.Tensor): Prototype masks of shape (BS, 32, H, W).
        pred_masks (torch.Tensor): Predicted masks for each anchor of shape (BS, N_anchors, 32).
        imgsz (torch.Tensor): Size of the input image as a tensor of shape (2), i.e., (H, W).
        overlap (bool): Whether the masks in `masks` tensor overlap.

    Returns:
        (torch.Tensor): The calculated loss for instance segmentation.

    Notes:
        The batch loss can be computed for improved speed at higher memory usage.
        For example, pred_mask can be computed as follows:
            pred_mask = torch.einsum('in,nhw->ihw', pred, proto)  # (i, 32) @ (32, 160, 160) -> (i, 160, 160)
    """
    _, _, mask_h, mask_w = proto.shape
    loss = 0

    # Normalize to 0-1
    target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]]

    # Areas of target bboxes
    marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2)

    # Normalize to mask size
    mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device)

    for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)):
        fg_mask_i, target_gt_idx_i, pred_masks_i, proto_i, mxyxy_i, marea_i, masks_i = single_i
        if fg_mask_i.any():
            mask_idx = target_gt_idx_i[fg_mask_i]
            if overlap:
                gt_mask = masks_i == (mask_idx + 1).view(-1, 1, 1)
                gt_mask = gt_mask.float()
            else:
                gt_mask = masks[batch_idx.view(-1) == i][mask_idx]

            loss += self.single_mask_loss(
                gt_mask, pred_masks_i[fg_mask_i], proto_i, mxyxy_i[fg_mask_i], marea_i[fg_mask_i]
            )

        # WARNING: lines below prevents Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove
        else:
            loss += (proto * 0).sum() + (pred_masks * 0).sum()  # inf sums may lead to nan loss

    return loss / fg_mask.sum()


method ultralytics.utils.loss.v8SegmentationLoss.single_mask_loss

def single_mask_loss(
    gt_mask: torch.Tensor, pred: torch.Tensor, proto: torch.Tensor, xyxy: torch.Tensor, area: torch.Tensor
) -> torch.Tensor

Compute the instance segmentation loss for a single image.

Args

NameTypeDescriptionDefault
gt_masktorch.TensorGround truth mask of shape (N, H, W), where N is the number of objects.required
predtorch.TensorPredicted mask coefficients of shape (N, 32).required
prototorch.TensorPrototype masks of shape (32, H, W).required
xyxytorch.TensorGround truth bounding boxes in xyxy format, normalized to [0, 1], of shape (N, 4).required
areatorch.TensorArea of each ground truth bounding box of shape (N,).required

Returns

TypeDescription
torch.TensorThe calculated mask loss for a single image.

Notes

The function uses the equation pred_mask = torch.einsum('in,nhw->ihw', pred, proto) to produce the predicted masks from the prototype masks and predicted mask coefficients.

Source code in ultralytics/utils/loss.pyView on GitHub
@staticmethod
def single_mask_loss(
    gt_mask: torch.Tensor, pred: torch.Tensor, proto: torch.Tensor, xyxy: torch.Tensor, area: torch.Tensor
) -> torch.Tensor:
    """Compute the instance segmentation loss for a single image.

    Args:
        gt_mask (torch.Tensor): Ground truth mask of shape (N, H, W), where N is the number of objects.
        pred (torch.Tensor): Predicted mask coefficients of shape (N, 32).
        proto (torch.Tensor): Prototype masks of shape (32, H, W).
        xyxy (torch.Tensor): Ground truth bounding boxes in xyxy format, normalized to [0, 1], of shape (N, 4).
        area (torch.Tensor): Area of each ground truth bounding box of shape (N,).

    Returns:
        (torch.Tensor): The calculated mask loss for a single image.

    Notes:
        The function uses the equation pred_mask = torch.einsum('in,nhw->ihw', pred, proto) to produce the
        predicted masks from the prototype masks and predicted mask coefficients.
    """
    pred_mask = torch.einsum("in,nhw->ihw", pred, proto)  # (n, 32) @ (32, 80, 80) -> (n, 80, 80)
    loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
    return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).sum()





class ultralytics.utils.loss.v8PoseLoss

v8PoseLoss(self, model)

Bases: v8DetectionLoss

Criterion class for computing training losses for YOLOv8 pose estimation.

Args

NameTypeDescriptionDefault
modelrequired

Methods

NameDescription
__call__Calculate the total loss and detach it for pose estimation.
calculate_keypoints_lossCalculate the keypoints loss for the model.
kpts_decodeDecode predicted keypoints to image coordinates.
Source code in ultralytics/utils/loss.pyView on GitHub
class v8PoseLoss(v8DetectionLoss):
    """Criterion class for computing training losses for YOLOv8 pose estimation."""

    def __init__(self, model):  # model must be de-paralleled
        """Initialize v8PoseLoss with model parameters and keypoint-specific loss functions."""
        super().__init__(model)
        self.kpt_shape = model.model[-1].kpt_shape
        self.bce_pose = nn.BCEWithLogitsLoss()
        is_pose = self.kpt_shape == [17, 3]
        nkpt = self.kpt_shape[0]  # number of keypoints
        sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt
        self.keypoint_loss = KeypointLoss(sigmas=sigmas)


method ultralytics.utils.loss.v8PoseLoss.__call__

def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]

Calculate the total loss and detach it for pose estimation.

Args

NameTypeDescriptionDefault
predsAnyrequired
batchdict[str, torch.Tensor]required
Source code in ultralytics/utils/loss.pyView on GitHub
def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
    """Calculate the total loss and detach it for pose estimation."""
    loss = torch.zeros(5, device=self.device)  # box, cls, dfl, kpt_location, kpt_visibility
    feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1]
    pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
        (self.reg_max * 4, self.nc), 1
    )

    # B, grids, ..
    pred_scores = pred_scores.permute(0, 2, 1).contiguous()
    pred_distri = pred_distri.permute(0, 2, 1).contiguous()
    pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()

    dtype = pred_scores.dtype
    imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
    anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

    # Targets
    batch_size = pred_scores.shape[0]
    batch_idx = batch["batch_idx"].view(-1, 1)
    targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"]), 1)
    targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
    gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
    mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)

    # Pboxes
    pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)
    pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape))  # (b, h*w, 17, 3)

    _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
        pred_scores.detach().sigmoid(),
        (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
        anchor_points * stride_tensor,
        gt_labels,
        gt_bboxes,
        mask_gt,
    )

    target_scores_sum = max(target_scores.sum(), 1)

    # Cls loss
    # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
    loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

    # Bbox loss
    if fg_mask.sum():
        target_bboxes /= stride_tensor
        loss[0], loss[4] = self.bbox_loss(
            pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
        )
        keypoints = batch["keypoints"].to(self.device).float().clone()
        keypoints[..., 0] *= imgsz[1]
        keypoints[..., 1] *= imgsz[0]

        loss[1], loss[2] = self.calculate_keypoints_loss(
            fg_mask, target_gt_idx, keypoints, batch_idx, stride_tensor, target_bboxes, pred_kpts
        )

    loss[0] *= self.hyp.box  # box gain
    loss[1] *= self.hyp.pose  # pose gain
    loss[2] *= self.hyp.kobj  # kobj gain
    loss[3] *= self.hyp.cls  # cls gain
    loss[4] *= self.hyp.dfl  # dfl gain

    return loss * batch_size, loss.detach()  # loss(box, cls, dfl)


method ultralytics.utils.loss.v8PoseLoss.calculate_keypoints_loss

def calculate_keypoints_loss(
    self,
    masks: torch.Tensor,
    target_gt_idx: torch.Tensor,
    keypoints: torch.Tensor,
    batch_idx: torch.Tensor,
    stride_tensor: torch.Tensor,
    target_bboxes: torch.Tensor,
    pred_kpts: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]

Calculate the keypoints loss for the model.

This function calculates the keypoints loss and keypoints object loss for a given batch. The keypoints loss is based on the difference between the predicted keypoints and ground truth keypoints. The keypoints object loss is a binary classification loss that classifies whether a keypoint is present or not.

Args

NameTypeDescriptionDefault
maskstorch.TensorBinary mask tensor indicating object presence, shape (BS, N_anchors).required
target_gt_idxtorch.TensorIndex tensor mapping anchors to ground truth objects, shape (BS, N_anchors).required
keypointstorch.TensorGround truth keypoints, shape (N_kpts_in_batch, N_kpts_per_object, kpts_dim).required
batch_idxtorch.TensorBatch index tensor for keypoints, shape (N_kpts_in_batch, 1).required
stride_tensortorch.TensorStride tensor for anchors, shape (N_anchors, 1).required
target_bboxestorch.TensorGround truth boxes in (x1, y1, x2, y2) format, shape (BS, N_anchors, 4).required
pred_kptstorch.TensorPredicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim).required

Returns

TypeDescription
kpts_loss (torch.Tensor)The keypoints loss.
kpts_obj_loss (torch.Tensor)The keypoints object loss.
Source code in ultralytics/utils/loss.pyView on GitHub
def calculate_keypoints_loss(
    self,
    masks: torch.Tensor,
    target_gt_idx: torch.Tensor,
    keypoints: torch.Tensor,
    batch_idx: torch.Tensor,
    stride_tensor: torch.Tensor,
    target_bboxes: torch.Tensor,
    pred_kpts: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]:
    """Calculate the keypoints loss for the model.

    This function calculates the keypoints loss and keypoints object loss for a given batch. The keypoints loss is
    based on the difference between the predicted keypoints and ground truth keypoints. The keypoints object loss is
    a binary classification loss that classifies whether a keypoint is present or not.

    Args:
        masks (torch.Tensor): Binary mask tensor indicating object presence, shape (BS, N_anchors).
        target_gt_idx (torch.Tensor): Index tensor mapping anchors to ground truth objects, shape (BS, N_anchors).
        keypoints (torch.Tensor): Ground truth keypoints, shape (N_kpts_in_batch, N_kpts_per_object, kpts_dim).
        batch_idx (torch.Tensor): Batch index tensor for keypoints, shape (N_kpts_in_batch, 1).
        stride_tensor (torch.Tensor): Stride tensor for anchors, shape (N_anchors, 1).
        target_bboxes (torch.Tensor): Ground truth boxes in (x1, y1, x2, y2) format, shape (BS, N_anchors, 4).
        pred_kpts (torch.Tensor): Predicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim).

    Returns:
        kpts_loss (torch.Tensor): The keypoints loss.
        kpts_obj_loss (torch.Tensor): The keypoints object loss.
    """
    batch_idx = batch_idx.flatten()
    batch_size = len(masks)

    # Find the maximum number of keypoints in a single image
    max_kpts = torch.unique(batch_idx, return_counts=True)[1].max()

    # Create a tensor to hold batched keypoints
    batched_keypoints = torch.zeros(
        (batch_size, max_kpts, keypoints.shape[1], keypoints.shape[2]), device=keypoints.device
    )

    # TODO: any idea how to vectorize this?
    # Fill batched_keypoints with keypoints based on batch_idx
    for i in range(batch_size):
        keypoints_i = keypoints[batch_idx == i]
        batched_keypoints[i, : keypoints_i.shape[0]] = keypoints_i

    # Expand dimensions of target_gt_idx to match the shape of batched_keypoints
    target_gt_idx_expanded = target_gt_idx.unsqueeze(-1).unsqueeze(-1)

    # Use target_gt_idx_expanded to select keypoints from batched_keypoints
    selected_keypoints = batched_keypoints.gather(
        1, target_gt_idx_expanded.expand(-1, -1, keypoints.shape[1], keypoints.shape[2])
    )

    # Divide coordinates by stride
    selected_keypoints[..., :2] /= stride_tensor.view(1, -1, 1, 1)

    kpts_loss = 0
    kpts_obj_loss = 0

    if masks.any():
        gt_kpt = selected_keypoints[masks]
        area = xyxy2xywh(target_bboxes[masks])[:, 2:].prod(1, keepdim=True)
        pred_kpt = pred_kpts[masks]
        kpt_mask = gt_kpt[..., 2] != 0 if gt_kpt.shape[-1] == 3 else torch.full_like(gt_kpt[..., 0], True)
        kpts_loss = self.keypoint_loss(pred_kpt, gt_kpt, kpt_mask, area)  # pose loss

        if pred_kpt.shape[-1] == 3:
            kpts_obj_loss = self.bce_pose(pred_kpt[..., 2], kpt_mask.float())  # keypoint obj loss

    return kpts_loss, kpts_obj_loss


method ultralytics.utils.loss.v8PoseLoss.kpts_decode

def kpts_decode(anchor_points: torch.Tensor, pred_kpts: torch.Tensor) -> torch.Tensor

Decode predicted keypoints to image coordinates.

Args

NameTypeDescriptionDefault
anchor_pointstorch.Tensorrequired
pred_kptstorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
@staticmethod
def kpts_decode(anchor_points: torch.Tensor, pred_kpts: torch.Tensor) -> torch.Tensor:
    """Decode predicted keypoints to image coordinates."""
    y = pred_kpts.clone()
    y[..., :2] *= 2.0
    y[..., 0] += anchor_points[:, [0]] - 0.5
    y[..., 1] += anchor_points[:, [1]] - 0.5
    return y





class ultralytics.utils.loss.v8ClassificationLoss

v8ClassificationLoss()

Criterion class for computing training losses for classification.

Methods

NameDescription
__call__Compute the classification loss between predictions and true labels.
Source code in ultralytics/utils/loss.pyView on GitHub
class v8ClassificationLoss:


method ultralytics.utils.loss.v8ClassificationLoss.__call__

def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]

Compute the classification loss between predictions and true labels.

Args

NameTypeDescriptionDefault
predsAnyrequired
batchdict[str, torch.Tensor]required
Source code in ultralytics/utils/loss.pyView on GitHub
def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
    """Compute the classification loss between predictions and true labels."""
    preds = preds[1] if isinstance(preds, (list, tuple)) else preds
    loss = F.cross_entropy(preds, batch["cls"], reduction="mean")
    return loss, loss.detach()





class ultralytics.utils.loss.v8OBBLoss

v8OBBLoss(self, model)

Bases: v8DetectionLoss

Calculates losses for object detection, classification, and box distribution in rotated YOLO models.

Args

NameTypeDescriptionDefault
modelrequired

Methods

NameDescription
__call__Calculate and return the loss for oriented bounding box detection.
bbox_decodeDecode predicted object bounding box coordinates from anchor points and distribution.
preprocessPreprocess targets for oriented bounding box detection.
Source code in ultralytics/utils/loss.pyView on GitHub
class v8OBBLoss(v8DetectionLoss):
    """Calculates losses for object detection, classification, and box distribution in rotated YOLO models."""

    def __init__(self, model):
        """Initialize v8OBBLoss with model, assigner, and rotated bbox loss; model must be de-paralleled."""
        super().__init__(model)
        self.assigner = RotatedTaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)
        self.bbox_loss = RotatedBboxLoss(self.reg_max).to(self.device)


method ultralytics.utils.loss.v8OBBLoss.__call__

def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]

Calculate and return the loss for oriented bounding box detection.

Args

NameTypeDescriptionDefault
predsAnyrequired
batchdict[str, torch.Tensor]required
Source code in ultralytics/utils/loss.pyView on GitHub
def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
    """Calculate and return the loss for oriented bounding box detection."""
    loss = torch.zeros(3, device=self.device)  # box, cls, dfl
    feats, pred_angle = preds if isinstance(preds[0], list) else preds[1]
    batch_size = pred_angle.shape[0]  # batch size, number of masks, mask height, mask width
    pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
        (self.reg_max * 4, self.nc), 1
    )

    # b, grids, ..
    pred_scores = pred_scores.permute(0, 2, 1).contiguous()
    pred_distri = pred_distri.permute(0, 2, 1).contiguous()
    pred_angle = pred_angle.permute(0, 2, 1).contiguous()

    dtype = pred_scores.dtype
    imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
    anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

    # targets
    try:
        batch_idx = batch["batch_idx"].view(-1, 1)
        targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"].view(-1, 5)), 1)
        rw, rh = targets[:, 4] * imgsz[0].item(), targets[:, 5] * imgsz[1].item()
        targets = targets[(rw >= 2) & (rh >= 2)]  # filter rboxes of tiny size to stabilize training
        targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
        gt_labels, gt_bboxes = targets.split((1, 5), 2)  # cls, xywhr
        mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)
    except RuntimeError as e:
        raise TypeError(
            "ERROR ❌ OBB dataset incorrectly formatted or not a OBB dataset.\n"
            "This error can occur when incorrectly training a 'OBB' model on a 'detect' dataset, "
            "i.e. 'yolo train model=yolo11n-obb.pt data=coco8.yaml'.\nVerify your dataset is a "
            "correctly formatted 'OBB' dataset using 'data=dota8.yaml' "
            "as an example.\nSee https://docs.ultralytics.com/datasets/obb/ for help."
        ) from e

    # Pboxes
    pred_bboxes = self.bbox_decode(anchor_points, pred_distri, pred_angle)  # xyxy, (b, h*w, 4)

    bboxes_for_assigner = pred_bboxes.clone().detach()
    # Only the first four elements need to be scaled
    bboxes_for_assigner[..., :4] *= stride_tensor
    _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
        pred_scores.detach().sigmoid(),
        bboxes_for_assigner.type(gt_bboxes.dtype),
        anchor_points * stride_tensor,
        gt_labels,
        gt_bboxes,
        mask_gt,
    )

    target_scores_sum = max(target_scores.sum(), 1)

    # Cls loss
    # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
    loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

    # Bbox loss
    if fg_mask.sum():
        target_bboxes[..., :4] /= stride_tensor
        loss[0], loss[2] = self.bbox_loss(
            pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
        )
    else:
        loss[0] += (pred_angle * 0).sum()

    loss[0] *= self.hyp.box  # box gain
    loss[1] *= self.hyp.cls  # cls gain
    loss[2] *= self.hyp.dfl  # dfl gain

    return loss * batch_size, loss.detach()  # loss(box, cls, dfl)


method ultralytics.utils.loss.v8OBBLoss.bbox_decode

def bbox_decode(self, anchor_points: torch.Tensor, pred_dist: torch.Tensor, pred_angle: torch.Tensor) -> torch.Tensor

Decode predicted object bounding box coordinates from anchor points and distribution.

Args

NameTypeDescriptionDefault
anchor_pointstorch.TensorAnchor points, (h*w, 2).required
pred_disttorch.TensorPredicted rotated distance, (bs, h*w, 4).required
pred_angletorch.TensorPredicted angle, (bs, h*w, 1).required

Returns

TypeDescription
torch.TensorPredicted rotated bounding boxes with angles, (bs, h*w, 5).
Source code in ultralytics/utils/loss.pyView on GitHub
def bbox_decode(
    self, anchor_points: torch.Tensor, pred_dist: torch.Tensor, pred_angle: torch.Tensor
) -> torch.Tensor:
    """Decode predicted object bounding box coordinates from anchor points and distribution.

    Args:
        anchor_points (torch.Tensor): Anchor points, (h*w, 2).
        pred_dist (torch.Tensor): Predicted rotated distance, (bs, h*w, 4).
        pred_angle (torch.Tensor): Predicted angle, (bs, h*w, 1).

    Returns:
        (torch.Tensor): Predicted rotated bounding boxes with angles, (bs, h*w, 5).
    """
    if self.use_dfl:
        b, a, c = pred_dist.shape  # batch, anchors, channels
        pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
    return torch.cat((dist2rbox(pred_dist, pred_angle, anchor_points), pred_angle), dim=-1)


method ultralytics.utils.loss.v8OBBLoss.preprocess

def preprocess(self, targets: torch.Tensor, batch_size: int, scale_tensor: torch.Tensor) -> torch.Tensor

Preprocess targets for oriented bounding box detection.

Args

NameTypeDescriptionDefault
targetstorch.Tensorrequired
batch_sizeintrequired
scale_tensortorch.Tensorrequired
Source code in ultralytics/utils/loss.pyView on GitHub
def preprocess(self, targets: torch.Tensor, batch_size: int, scale_tensor: torch.Tensor) -> torch.Tensor:
    """Preprocess targets for oriented bounding box detection."""
    if targets.shape[0] == 0:
        out = torch.zeros(batch_size, 0, 6, device=self.device)
    else:
        i = targets[:, 0]  # image index
        _, counts = i.unique(return_counts=True)
        counts = counts.to(dtype=torch.int32)
        out = torch.zeros(batch_size, counts.max(), 6, device=self.device)
        for j in range(batch_size):
            matches = i == j
            if n := matches.sum():
                bboxes = targets[matches, 2:]
                bboxes[..., :4].mul_(scale_tensor)
                out[j, :n] = torch.cat([targets[matches, 1:2], bboxes], dim=-1)
    return out





class ultralytics.utils.loss.E2EDetectLoss

E2EDetectLoss(self, model)

Criterion class for computing training losses for end-to-end detection.

Args

NameTypeDescriptionDefault
modelrequired

Methods

NameDescription
__call__Calculate the sum of the loss for box, cls and dfl multiplied by batch size.
Source code in ultralytics/utils/loss.pyView on GitHub
class E2EDetectLoss:
    """Criterion class for computing training losses for end-to-end detection."""

    def __init__(self, model):
        """Initialize E2EDetectLoss with one-to-many and one-to-one detection losses using the provided model."""
        self.one2many = v8DetectionLoss(model, tal_topk=10)
        self.one2one = v8DetectionLoss(model, tal_topk=1)


method ultralytics.utils.loss.E2EDetectLoss.__call__

def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]

Calculate the sum of the loss for box, cls and dfl multiplied by batch size.

Args

NameTypeDescriptionDefault
predsAnyrequired
batchdict[str, torch.Tensor]required
Source code in ultralytics/utils/loss.pyView on GitHub
def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
    """Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
    preds = preds[1] if isinstance(preds, tuple) else preds
    one2many = preds["one2many"]
    loss_one2many = self.one2many(one2many, batch)
    one2one = preds["one2one"]
    loss_one2one = self.one2one(one2one, batch)
    return loss_one2many[0] + loss_one2one[0], loss_one2many[1] + loss_one2one[1]





class ultralytics.utils.loss.TVPDetectLoss

TVPDetectLoss(self, model)

Criterion class for computing training losses for text-visual prompt detection.

Args

NameTypeDescriptionDefault
modelrequired

Methods

NameDescription
__call__Calculate the loss for text-visual prompt detection.
_get_vp_featuresExtract visual-prompt features from the model output.
Source code in ultralytics/utils/loss.pyView on GitHub
class TVPDetectLoss:
    """Criterion class for computing training losses for text-visual prompt detection."""

    def __init__(self, model):
        """Initialize TVPDetectLoss with task-prompt and visual-prompt criteria using the provided model."""
        self.vp_criterion = v8DetectionLoss(model)
        # NOTE: store following info as it's changeable in __call__
        self.ori_nc = self.vp_criterion.nc
        self.ori_no = self.vp_criterion.no
        self.ori_reg_max = self.vp_criterion.reg_max


method ultralytics.utils.loss.TVPDetectLoss.__call__

def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]

Calculate the loss for text-visual prompt detection.

Args

NameTypeDescriptionDefault
predsAnyrequired
batchdict[str, torch.Tensor]required
Source code in ultralytics/utils/loss.pyView on GitHub
def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
    """Calculate the loss for text-visual prompt detection."""
    feats = preds[1] if isinstance(preds, tuple) else preds
    assert self.ori_reg_max == self.vp_criterion.reg_max  # TODO: remove it

    if self.ori_reg_max * 4 + self.ori_nc == feats[0].shape[1]:
        loss = torch.zeros(3, device=self.vp_criterion.device, requires_grad=True)
        return loss, loss.detach()

    vp_feats = self._get_vp_features(feats)
    vp_loss = self.vp_criterion(vp_feats, batch)
    box_loss = vp_loss[0][1]
    return box_loss, vp_loss[1]


method ultralytics.utils.loss.TVPDetectLoss._get_vp_features

def _get_vp_features(self, feats: list[torch.Tensor]) -> list[torch.Tensor]

Extract visual-prompt features from the model output.

Args

NameTypeDescriptionDefault
featslist[torch.Tensor]required
Source code in ultralytics/utils/loss.pyView on GitHub
def _get_vp_features(self, feats: list[torch.Tensor]) -> list[torch.Tensor]:
    """Extract visual-prompt features from the model output."""
    vnc = feats[0].shape[1] - self.ori_reg_max * 4 - self.ori_nc

    self.vp_criterion.nc = vnc
    self.vp_criterion.no = vnc + self.vp_criterion.reg_max * 4
    self.vp_criterion.assigner.num_classes = vnc

    return [
        torch.cat((box, cls_vp), dim=1)
        for box, _, cls_vp in [xi.split((self.ori_reg_max * 4, self.ori_nc, vnc), dim=1) for xi in feats]
    ]





class ultralytics.utils.loss.TVPSegmentLoss

TVPSegmentLoss(self, model)

Bases: TVPDetectLoss

Criterion class for computing training losses for text-visual prompt segmentation.

Args

NameTypeDescriptionDefault
modelrequired

Methods

NameDescription
__call__Calculate the loss for text-visual prompt segmentation.
Source code in ultralytics/utils/loss.pyView on GitHub
class TVPSegmentLoss(TVPDetectLoss):
    """Criterion class for computing training losses for text-visual prompt segmentation."""

    def __init__(self, model):
        """Initialize TVPSegmentLoss with task-prompt and visual-prompt criteria using the provided model."""
        super().__init__(model)
        self.vp_criterion = v8SegmentationLoss(model)


method ultralytics.utils.loss.TVPSegmentLoss.__call__

def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]

Calculate the loss for text-visual prompt segmentation.

Args

NameTypeDescriptionDefault
predsAnyrequired
batchdict[str, torch.Tensor]required
Source code in ultralytics/utils/loss.pyView on GitHub
def __call__(self, preds: Any, batch: dict[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
    """Calculate the loss for text-visual prompt segmentation."""
    feats, pred_masks, proto = preds if len(preds) == 3 else preds[1]
    assert self.ori_reg_max == self.vp_criterion.reg_max  # TODO: remove it

    if self.ori_reg_max * 4 + self.ori_nc == feats[0].shape[1]:
        loss = torch.zeros(4, device=self.vp_criterion.device, requires_grad=True)
        return loss, loss.detach()

    vp_feats = self._get_vp_features(feats)
    vp_loss = self.vp_criterion((vp_feats, pred_masks, proto), batch)
    cls_loss = vp_loss[0][2]
    return cls_loss, vp_loss[1]





📅 Created 2 years ago ✏️ Updated 3 days ago
glenn-jocherBurhan-QRizwanMunawarjk4eLaughing-q