Bỏ để qua phần nội dung

Tài liệu tham khảo cho ultralytics/utils/loss.py

Ghi

Tệp này có sẵn tại https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/loss.py. Nếu bạn phát hiện ra một vấn đề, vui lòng giúp khắc phục nó bằng cách đóng góp Yêu cầu 🛠️ kéo. Cảm ơn bạn 🙏 !



ultralytics.utils.loss.VarifocalLoss

Căn cứ: Module

Mất varifocal của Zhang et al.

https://arxiv.org/abs/2008.13367.

Mã nguồn trong ultralytics/utils/loss.py
class VarifocalLoss(nn.Module):
    """
    Varifocal loss by Zhang et al.

    https://arxiv.org/abs/2008.13367.
    """

    def __init__(self):
        """Initialize the VarifocalLoss class."""
        super().__init__()

    @staticmethod
    def forward(pred_score, gt_score, label, alpha=0.75, gamma=2.0):
        """Computes varfocal loss."""
        weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label
        with torch.cuda.amp.autocast(enabled=False):
            loss = (
                (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight)
                .mean(1)
                .sum()
            )
        return loss

__init__()

Khởi tạo lớp VarifocalLoss.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self):
    """Initialize the VarifocalLoss class."""
    super().__init__()

forward(pred_score, gt_score, label, alpha=0.75, gamma=2.0) staticmethod

Tính toán tổn thất varfocal.

Mã nguồn trong ultralytics/utils/loss.py
@staticmethod
def forward(pred_score, gt_score, label, alpha=0.75, gamma=2.0):
    """Computes varfocal loss."""
    weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label
    with torch.cuda.amp.autocast(enabled=False):
        loss = (
            (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight)
            .mean(1)
            .sum()
        )
    return loss



ultralytics.utils.loss.FocalLoss

Căn cứ: Module

Bao bọc tổn thất khu trú xung quanh loss_fcn () hiện có, tức là tiêu chí = FocalLoss (nn. BCEWithLogitsLoss(), gamma = 1,5).

Mã nguồn trong ultralytics/utils/loss.py
class FocalLoss(nn.Module):
    """Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""

    def __init__(self):
        """Initializer for FocalLoss class with no parameters."""
        super().__init__()

    @staticmethod
    def forward(pred, label, gamma=1.5, alpha=0.25):
        """Calculates and updates confusion matrix for object detection/classification tasks."""
        loss = F.binary_cross_entropy_with_logits(pred, label, reduction="none")
        # p_t = torch.exp(-loss)
        # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability

        # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
        pred_prob = pred.sigmoid()  # prob from logits
        p_t = label * pred_prob + (1 - label) * (1 - pred_prob)
        modulating_factor = (1.0 - p_t) ** gamma
        loss *= modulating_factor
        if alpha > 0:
            alpha_factor = label * alpha + (1 - label) * (1 - alpha)
            loss *= alpha_factor
        return loss.mean(1).sum()

__init__()

Initializer cho lớp FocalLoss không có tham số.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self):
    """Initializer for FocalLoss class with no parameters."""
    super().__init__()

forward(pred, label, gamma=1.5, alpha=0.25) staticmethod

Tính toán và cập nhật ma trận nhầm lẫn cho các tác vụ phát hiện / phân loại đối tượng.

Mã nguồn trong ultralytics/utils/loss.py
@staticmethod
def forward(pred, label, gamma=1.5, alpha=0.25):
    """Calculates and updates confusion matrix for object detection/classification tasks."""
    loss = F.binary_cross_entropy_with_logits(pred, label, reduction="none")
    # p_t = torch.exp(-loss)
    # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability

    # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
    pred_prob = pred.sigmoid()  # prob from logits
    p_t = label * pred_prob + (1 - label) * (1 - pred_prob)
    modulating_factor = (1.0 - p_t) ** gamma
    loss *= modulating_factor
    if alpha > 0:
        alpha_factor = label * alpha + (1 - label) * (1 - alpha)
        loss *= alpha_factor
    return loss.mean(1).sum()



ultralytics.utils.loss.BboxLoss

Căn cứ: Module

Lớp tiêu chí tính toán tổn thất đào tạo trong quá trình đào tạo.

Mã nguồn trong ultralytics/utils/loss.py
class BboxLoss(nn.Module):
    """Criterion class for computing training losses during training."""

    def __init__(self, reg_max, use_dfl=False):
        """Initialize the BboxLoss module with regularization maximum and DFL settings."""
        super().__init__()
        self.reg_max = reg_max
        self.use_dfl = use_dfl

    def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
        """IoU loss."""
        weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
        iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True)
        loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum

        # DFL loss
        if self.use_dfl:
            target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max)
            loss_dfl = self._df_loss(pred_dist[fg_mask].view(-1, self.reg_max + 1), target_ltrb[fg_mask]) * weight
            loss_dfl = loss_dfl.sum() / target_scores_sum
        else:
            loss_dfl = torch.tensor(0.0).to(pred_dist.device)

        return loss_iou, loss_dfl

    @staticmethod
    def _df_loss(pred_dist, target):
        """
        Return sum of left and right DFL losses.

        Distribution Focal Loss (DFL) proposed in Generalized Focal Loss
        https://ieeexplore.ieee.org/document/9792391
        """
        tl = target.long()  # target left
        tr = tl + 1  # target right
        wl = tr - target  # weight left
        wr = 1 - wl  # weight right
        return (
            F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape) * wl
            + F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape) * wr
        ).mean(-1, keepdim=True)

__init__(reg_max, use_dfl=False)

Khởi tạo mô-đun BboxLoss với cài đặt tối đa và DFL chính quy.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self, reg_max, use_dfl=False):
    """Initialize the BboxLoss module with regularization maximum and DFL settings."""
    super().__init__()
    self.reg_max = reg_max
    self.use_dfl = use_dfl

forward(pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask)

Mất IoU.

Mã nguồn trong ultralytics/utils/loss.py
def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
    """IoU loss."""
    weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
    iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True)
    loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum

    # DFL loss
    if self.use_dfl:
        target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max)
        loss_dfl = self._df_loss(pred_dist[fg_mask].view(-1, self.reg_max + 1), target_ltrb[fg_mask]) * weight
        loss_dfl = loss_dfl.sum() / target_scores_sum
    else:
        loss_dfl = torch.tensor(0.0).to(pred_dist.device)

    return loss_iou, loss_dfl



ultralytics.utils.loss.RotatedBboxLoss

Căn cứ: BboxLoss

Lớp tiêu chí tính toán tổn thất đào tạo trong quá trình đào tạo.

Mã nguồn trong ultralytics/utils/loss.py
class RotatedBboxLoss(BboxLoss):
    """Criterion class for computing training losses during training."""

    def __init__(self, reg_max, use_dfl=False):
        """Initialize the BboxLoss module with regularization maximum and DFL settings."""
        super().__init__(reg_max, use_dfl)

    def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
        """IoU loss."""
        weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
        iou = probiou(pred_bboxes[fg_mask], target_bboxes[fg_mask])
        loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum

        # DFL loss
        if self.use_dfl:
            target_ltrb = bbox2dist(anchor_points, xywh2xyxy(target_bboxes[..., :4]), self.reg_max)
            loss_dfl = self._df_loss(pred_dist[fg_mask].view(-1, self.reg_max + 1), target_ltrb[fg_mask]) * weight
            loss_dfl = loss_dfl.sum() / target_scores_sum
        else:
            loss_dfl = torch.tensor(0.0).to(pred_dist.device)

        return loss_iou, loss_dfl

__init__(reg_max, use_dfl=False)

Khởi tạo mô-đun BboxLoss với cài đặt tối đa và DFL chính quy.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self, reg_max, use_dfl=False):
    """Initialize the BboxLoss module with regularization maximum and DFL settings."""
    super().__init__(reg_max, use_dfl)

forward(pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask)

Mất IoU.

Mã nguồn trong ultralytics/utils/loss.py
def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
    """IoU loss."""
    weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
    iou = probiou(pred_bboxes[fg_mask], target_bboxes[fg_mask])
    loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum

    # DFL loss
    if self.use_dfl:
        target_ltrb = bbox2dist(anchor_points, xywh2xyxy(target_bboxes[..., :4]), self.reg_max)
        loss_dfl = self._df_loss(pred_dist[fg_mask].view(-1, self.reg_max + 1), target_ltrb[fg_mask]) * weight
        loss_dfl = loss_dfl.sum() / target_scores_sum
    else:
        loss_dfl = torch.tensor(0.0).to(pred_dist.device)

    return loss_iou, loss_dfl



ultralytics.utils.loss.KeypointLoss

Căn cứ: Module

Lớp tiêu chí tính toán tổn thất đào tạo.

Mã nguồn trong ultralytics/utils/loss.py
class KeypointLoss(nn.Module):
    """Criterion class for computing training losses."""

    def __init__(self, sigmas) -> None:
        """Initialize the KeypointLoss class."""
        super().__init__()
        self.sigmas = sigmas

    def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
        """Calculates keypoint loss factor and Euclidean distance loss for predicted and actual keypoints."""
        d = (pred_kpts[..., 0] - gt_kpts[..., 0]).pow(2) + (pred_kpts[..., 1] - gt_kpts[..., 1]).pow(2)
        kpt_loss_factor = kpt_mask.shape[1] / (torch.sum(kpt_mask != 0, dim=1) + 1e-9)
        # e = d / (2 * (area * self.sigmas) ** 2 + 1e-9)  # from formula
        e = d / ((2 * self.sigmas).pow(2) * (area + 1e-9) * 2)  # from cocoeval
        return (kpt_loss_factor.view(-1, 1) * ((1 - torch.exp(-e)) * kpt_mask)).mean()

__init__(sigmas)

Khởi tạo lớp KeypointLoss.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self, sigmas) -> None:
    """Initialize the KeypointLoss class."""
    super().__init__()
    self.sigmas = sigmas

forward(pred_kpts, gt_kpts, kpt_mask, area)

Tính toán hệ số tổn thất điểm chính và tổn thất khoảng cách Euclid cho các điểm chính dự đoán và thực tế.

Mã nguồn trong ultralytics/utils/loss.py
def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
    """Calculates keypoint loss factor and Euclidean distance loss for predicted and actual keypoints."""
    d = (pred_kpts[..., 0] - gt_kpts[..., 0]).pow(2) + (pred_kpts[..., 1] - gt_kpts[..., 1]).pow(2)
    kpt_loss_factor = kpt_mask.shape[1] / (torch.sum(kpt_mask != 0, dim=1) + 1e-9)
    # e = d / (2 * (area * self.sigmas) ** 2 + 1e-9)  # from formula
    e = d / ((2 * self.sigmas).pow(2) * (area + 1e-9) * 2)  # from cocoeval
    return (kpt_loss_factor.view(-1, 1) * ((1 - torch.exp(-e)) * kpt_mask)).mean()



ultralytics.utils.loss.v8DetectionLoss

Lớp tiêu chí tính toán tổn thất đào tạo.

Mã nguồn trong ultralytics/utils/loss.py
class v8DetectionLoss:
    """Criterion class for computing training losses."""

    def __init__(self, model, tal_topk=10):  # model must be de-paralleled
        """Initializes v8DetectionLoss with the model, defining model-related properties and BCE loss function."""
        device = next(model.parameters()).device  # get model device
        h = model.args  # hyperparameters

        m = model.model[-1]  # Detect() module
        self.bce = nn.BCEWithLogitsLoss(reduction="none")
        self.hyp = h
        self.stride = m.stride  # model strides
        self.nc = m.nc  # number of classes
        self.no = m.nc + m.reg_max * 4
        self.reg_max = m.reg_max
        self.device = device

        self.use_dfl = m.reg_max > 1

        self.assigner = TaskAlignedAssigner(topk=tal_topk, num_classes=self.nc, alpha=0.5, beta=6.0)
        self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device)
        self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)

    def preprocess(self, targets, batch_size, scale_tensor):
        """Preprocesses the target counts and matches with the input batch size to output a tensor."""
        if targets.shape[0] == 0:
            out = torch.zeros(batch_size, 0, 5, device=self.device)
        else:
            i = targets[:, 0]  # image index
            _, counts = i.unique(return_counts=True)
            counts = counts.to(dtype=torch.int32)
            out = torch.zeros(batch_size, counts.max(), 5, device=self.device)
            for j in range(batch_size):
                matches = i == j
                n = matches.sum()
                if n:
                    out[j, :n] = targets[matches, 1:]
            out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor))
        return out

    def bbox_decode(self, anchor_points, pred_dist):
        """Decode predicted object bounding box coordinates from anchor points and distribution."""
        if self.use_dfl:
            b, a, c = pred_dist.shape  # batch, anchors, channels
            pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
            # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype))
            # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2)
        return dist2bbox(pred_dist, anchor_points, xywh=False)

    def __call__(self, preds, batch):
        """Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
        loss = torch.zeros(3, device=self.device)  # box, cls, dfl
        feats = preds[1] if isinstance(preds, tuple) else preds
        pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
            (self.reg_max * 4, self.nc), 1
        )

        pred_scores = pred_scores.permute(0, 2, 1).contiguous()
        pred_distri = pred_distri.permute(0, 2, 1).contiguous()

        dtype = pred_scores.dtype
        batch_size = pred_scores.shape[0]
        imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
        anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

        # Targets
        targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)
        targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
        gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
        mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)

        # Pboxes
        pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)

        _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
            pred_scores.detach().sigmoid(),
            (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
            anchor_points * stride_tensor,
            gt_labels,
            gt_bboxes,
            mask_gt,
        )

        target_scores_sum = max(target_scores.sum(), 1)

        # Cls loss
        # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
        loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

        # Bbox loss
        if fg_mask.sum():
            target_bboxes /= stride_tensor
            loss[0], loss[2] = self.bbox_loss(
                pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
            )

        loss[0] *= self.hyp.box  # box gain
        loss[1] *= self.hyp.cls  # cls gain
        loss[2] *= self.hyp.dfl  # dfl gain

        return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)

__call__(preds, batch)

Tính tổng tổn thất cho hộp, cls và dfl nhân với kích thước lô.

Mã nguồn trong ultralytics/utils/loss.py
def __call__(self, preds, batch):
    """Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
    loss = torch.zeros(3, device=self.device)  # box, cls, dfl
    feats = preds[1] if isinstance(preds, tuple) else preds
    pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
        (self.reg_max * 4, self.nc), 1
    )

    pred_scores = pred_scores.permute(0, 2, 1).contiguous()
    pred_distri = pred_distri.permute(0, 2, 1).contiguous()

    dtype = pred_scores.dtype
    batch_size = pred_scores.shape[0]
    imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
    anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

    # Targets
    targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)
    targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
    gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
    mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)

    # Pboxes
    pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)

    _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
        pred_scores.detach().sigmoid(),
        (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
        anchor_points * stride_tensor,
        gt_labels,
        gt_bboxes,
        mask_gt,
    )

    target_scores_sum = max(target_scores.sum(), 1)

    # Cls loss
    # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
    loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

    # Bbox loss
    if fg_mask.sum():
        target_bboxes /= stride_tensor
        loss[0], loss[2] = self.bbox_loss(
            pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
        )

    loss[0] *= self.hyp.box  # box gain
    loss[1] *= self.hyp.cls  # cls gain
    loss[2] *= self.hyp.dfl  # dfl gain

    return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)

__init__(model, tal_topk=10)

Khởi tạo v8DetectionLoss với model, xác định các thuộc tính liên quan đến model và hàm mất BCE.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self, model, tal_topk=10):  # model must be de-paralleled
    """Initializes v8DetectionLoss with the model, defining model-related properties and BCE loss function."""
    device = next(model.parameters()).device  # get model device
    h = model.args  # hyperparameters

    m = model.model[-1]  # Detect() module
    self.bce = nn.BCEWithLogitsLoss(reduction="none")
    self.hyp = h
    self.stride = m.stride  # model strides
    self.nc = m.nc  # number of classes
    self.no = m.nc + m.reg_max * 4
    self.reg_max = m.reg_max
    self.device = device

    self.use_dfl = m.reg_max > 1

    self.assigner = TaskAlignedAssigner(topk=tal_topk, num_classes=self.nc, alpha=0.5, beta=6.0)
    self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device)
    self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)

bbox_decode(anchor_points, pred_dist)

Giải mã tọa độ hộp giới hạn đối tượng dự đoán từ các điểm neo và phân bố.

Mã nguồn trong ultralytics/utils/loss.py
def bbox_decode(self, anchor_points, pred_dist):
    """Decode predicted object bounding box coordinates from anchor points and distribution."""
    if self.use_dfl:
        b, a, c = pred_dist.shape  # batch, anchors, channels
        pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
        # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype))
        # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2)
    return dist2bbox(pred_dist, anchor_points, xywh=False)

preprocess(targets, batch_size, scale_tensor)

Xử lý trước số lượng mục tiêu và khớp với kích thước lô đầu vào để xuất ra tensor.

Mã nguồn trong ultralytics/utils/loss.py
def preprocess(self, targets, batch_size, scale_tensor):
    """Preprocesses the target counts and matches with the input batch size to output a tensor."""
    if targets.shape[0] == 0:
        out = torch.zeros(batch_size, 0, 5, device=self.device)
    else:
        i = targets[:, 0]  # image index
        _, counts = i.unique(return_counts=True)
        counts = counts.to(dtype=torch.int32)
        out = torch.zeros(batch_size, counts.max(), 5, device=self.device)
        for j in range(batch_size):
            matches = i == j
            n = matches.sum()
            if n:
                out[j, :n] = targets[matches, 1:]
        out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor))
    return out



ultralytics.utils.loss.v8SegmentationLoss

Căn cứ: v8DetectionLoss

Lớp tiêu chí tính toán tổn thất đào tạo.

Mã nguồn trong ultralytics/utils/loss.py
class v8SegmentationLoss(v8DetectionLoss):
    """Criterion class for computing training losses."""

    def __init__(self, model):  # model must be de-paralleled
        """Initializes the v8SegmentationLoss class, taking a de-paralleled model as argument."""
        super().__init__(model)
        self.overlap = model.args.overlap_mask

    def __call__(self, preds, batch):
        """Calculate and return the loss for the YOLO model."""
        loss = torch.zeros(4, device=self.device)  # box, cls, dfl
        feats, pred_masks, proto = preds if len(preds) == 3 else preds[1]
        batch_size, _, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
        pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
            (self.reg_max * 4, self.nc), 1
        )

        # B, grids, ..
        pred_scores = pred_scores.permute(0, 2, 1).contiguous()
        pred_distri = pred_distri.permute(0, 2, 1).contiguous()
        pred_masks = pred_masks.permute(0, 2, 1).contiguous()

        dtype = pred_scores.dtype
        imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
        anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

        # Targets
        try:
            batch_idx = batch["batch_idx"].view(-1, 1)
            targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"]), 1)
            targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
            gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
            mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
        except RuntimeError as e:
            raise TypeError(
                "ERROR ❌ segment dataset incorrectly formatted or not a segment dataset.\n"
                "This error can occur when incorrectly training a 'segment' model on a 'detect' dataset, "
                "i.e. 'yolo train model=yolov8n-seg.pt data=coco8.yaml'.\nVerify your dataset is a "
                "correctly formatted 'segment' dataset using 'data=coco8-seg.yaml' "
                "as an example.\nSee https://docs.ultralytics.com/datasets/segment/ for help."
            ) from e

        # Pboxes
        pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)

        _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
            pred_scores.detach().sigmoid(),
            (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
            anchor_points * stride_tensor,
            gt_labels,
            gt_bboxes,
            mask_gt,
        )

        target_scores_sum = max(target_scores.sum(), 1)

        # Cls loss
        # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
        loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

        if fg_mask.sum():
            # Bbox loss
            loss[0], loss[3] = self.bbox_loss(
                pred_distri,
                pred_bboxes,
                anchor_points,
                target_bboxes / stride_tensor,
                target_scores,
                target_scores_sum,
                fg_mask,
            )
            # Masks loss
            masks = batch["masks"].to(self.device).float()
            if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
                masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]

            loss[1] = self.calculate_segmentation_loss(
                fg_mask, masks, target_gt_idx, target_bboxes, batch_idx, proto, pred_masks, imgsz, self.overlap
            )

        # WARNING: lines below prevent Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove
        else:
            loss[1] += (proto * 0).sum() + (pred_masks * 0).sum()  # inf sums may lead to nan loss

        loss[0] *= self.hyp.box  # box gain
        loss[1] *= self.hyp.box  # seg gain
        loss[2] *= self.hyp.cls  # cls gain
        loss[3] *= self.hyp.dfl  # dfl gain

        return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)

    @staticmethod
    def single_mask_loss(
        gt_mask: torch.Tensor, pred: torch.Tensor, proto: torch.Tensor, xyxy: torch.Tensor, area: torch.Tensor
    ) -> torch.Tensor:
        """
        Compute the instance segmentation loss for a single image.

        Args:
            gt_mask (torch.Tensor): Ground truth mask of shape (n, H, W), where n is the number of objects.
            pred (torch.Tensor): Predicted mask coefficients of shape (n, 32).
            proto (torch.Tensor): Prototype masks of shape (32, H, W).
            xyxy (torch.Tensor): Ground truth bounding boxes in xyxy format, normalized to [0, 1], of shape (n, 4).
            area (torch.Tensor): Area of each ground truth bounding box of shape (n,).

        Returns:
            (torch.Tensor): The calculated mask loss for a single image.

        Notes:
            The function uses the equation pred_mask = torch.einsum('in,nhw->ihw', pred, proto) to produce the
            predicted masks from the prototype masks and predicted mask coefficients.
        """
        pred_mask = torch.einsum("in,nhw->ihw", pred, proto)  # (n, 32) @ (32, 80, 80) -> (n, 80, 80)
        loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
        return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).sum()

    def calculate_segmentation_loss(
        self,
        fg_mask: torch.Tensor,
        masks: torch.Tensor,
        target_gt_idx: torch.Tensor,
        target_bboxes: torch.Tensor,
        batch_idx: torch.Tensor,
        proto: torch.Tensor,
        pred_masks: torch.Tensor,
        imgsz: torch.Tensor,
        overlap: bool,
    ) -> torch.Tensor:
        """
        Calculate the loss for instance segmentation.

        Args:
            fg_mask (torch.Tensor): A binary tensor of shape (BS, N_anchors) indicating which anchors are positive.
            masks (torch.Tensor): Ground truth masks of shape (BS, H, W) if `overlap` is False, otherwise (BS, ?, H, W).
            target_gt_idx (torch.Tensor): Indexes of ground truth objects for each anchor of shape (BS, N_anchors).
            target_bboxes (torch.Tensor): Ground truth bounding boxes for each anchor of shape (BS, N_anchors, 4).
            batch_idx (torch.Tensor): Batch indices of shape (N_labels_in_batch, 1).
            proto (torch.Tensor): Prototype masks of shape (BS, 32, H, W).
            pred_masks (torch.Tensor): Predicted masks for each anchor of shape (BS, N_anchors, 32).
            imgsz (torch.Tensor): Size of the input image as a tensor of shape (2), i.e., (H, W).
            overlap (bool): Whether the masks in `masks` tensor overlap.

        Returns:
            (torch.Tensor): The calculated loss for instance segmentation.

        Notes:
            The batch loss can be computed for improved speed at higher memory usage.
            For example, pred_mask can be computed as follows:
                pred_mask = torch.einsum('in,nhw->ihw', pred, proto)  # (i, 32) @ (32, 160, 160) -> (i, 160, 160)
        """
        _, _, mask_h, mask_w = proto.shape
        loss = 0

        # Normalize to 0-1
        target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]]

        # Areas of target bboxes
        marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2)

        # Normalize to mask size
        mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device)

        for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)):
            fg_mask_i, target_gt_idx_i, pred_masks_i, proto_i, mxyxy_i, marea_i, masks_i = single_i
            if fg_mask_i.any():
                mask_idx = target_gt_idx_i[fg_mask_i]
                if overlap:
                    gt_mask = masks_i == (mask_idx + 1).view(-1, 1, 1)
                    gt_mask = gt_mask.float()
                else:
                    gt_mask = masks[batch_idx.view(-1) == i][mask_idx]

                loss += self.single_mask_loss(
                    gt_mask, pred_masks_i[fg_mask_i], proto_i, mxyxy_i[fg_mask_i], marea_i[fg_mask_i]
                )

            # WARNING: lines below prevents Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove
            else:
                loss += (proto * 0).sum() + (pred_masks * 0).sum()  # inf sums may lead to nan loss

        return loss / fg_mask.sum()

__call__(preds, batch)

Tính toán và trả lại tổn thất cho YOLO mẫu.

Mã nguồn trong ultralytics/utils/loss.py
def __call__(self, preds, batch):
    """Calculate and return the loss for the YOLO model."""
    loss = torch.zeros(4, device=self.device)  # box, cls, dfl
    feats, pred_masks, proto = preds if len(preds) == 3 else preds[1]
    batch_size, _, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
    pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
        (self.reg_max * 4, self.nc), 1
    )

    # B, grids, ..
    pred_scores = pred_scores.permute(0, 2, 1).contiguous()
    pred_distri = pred_distri.permute(0, 2, 1).contiguous()
    pred_masks = pred_masks.permute(0, 2, 1).contiguous()

    dtype = pred_scores.dtype
    imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
    anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

    # Targets
    try:
        batch_idx = batch["batch_idx"].view(-1, 1)
        targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"]), 1)
        targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
        gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
        mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
    except RuntimeError as e:
        raise TypeError(
            "ERROR ❌ segment dataset incorrectly formatted or not a segment dataset.\n"
            "This error can occur when incorrectly training a 'segment' model on a 'detect' dataset, "
            "i.e. 'yolo train model=yolov8n-seg.pt data=coco8.yaml'.\nVerify your dataset is a "
            "correctly formatted 'segment' dataset using 'data=coco8-seg.yaml' "
            "as an example.\nSee https://docs.ultralytics.com/datasets/segment/ for help."
        ) from e

    # Pboxes
    pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)

    _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
        pred_scores.detach().sigmoid(),
        (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
        anchor_points * stride_tensor,
        gt_labels,
        gt_bboxes,
        mask_gt,
    )

    target_scores_sum = max(target_scores.sum(), 1)

    # Cls loss
    # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
    loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

    if fg_mask.sum():
        # Bbox loss
        loss[0], loss[3] = self.bbox_loss(
            pred_distri,
            pred_bboxes,
            anchor_points,
            target_bboxes / stride_tensor,
            target_scores,
            target_scores_sum,
            fg_mask,
        )
        # Masks loss
        masks = batch["masks"].to(self.device).float()
        if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
            masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]

        loss[1] = self.calculate_segmentation_loss(
            fg_mask, masks, target_gt_idx, target_bboxes, batch_idx, proto, pred_masks, imgsz, self.overlap
        )

    # WARNING: lines below prevent Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove
    else:
        loss[1] += (proto * 0).sum() + (pred_masks * 0).sum()  # inf sums may lead to nan loss

    loss[0] *= self.hyp.box  # box gain
    loss[1] *= self.hyp.box  # seg gain
    loss[2] *= self.hyp.cls  # cls gain
    loss[3] *= self.hyp.dfl  # dfl gain

    return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)

__init__(model)

Khởi tạo lớp v8SegmentationLoss, lấy một mô hình de-paralleled làm đối số.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self, model):  # model must be de-paralleled
    """Initializes the v8SegmentationLoss class, taking a de-paralleled model as argument."""
    super().__init__(model)
    self.overlap = model.args.overlap_mask

calculate_segmentation_loss(fg_mask, masks, target_gt_idx, target_bboxes, batch_idx, proto, pred_masks, imgsz, overlap)

Tính toán tổn thất cho phân đoạn phiên bản.

Thông số:

Tên Kiểu Sự miêu tả Mặc định
fg_mask Tensor

Một nhị phân tensor hình dạng (BS, N_anchors) cho biết neo nào là dương.

bắt buộc
masks Tensor

Mặt nạ sự thật mặt đất của hình dạng (BS, H, W) nếu overlap là False, nếu không (BS, ?, H, W).

bắt buộc
target_gt_idx Tensor

Chỉ số của các đối tượng chân lý mặt đất cho mỗi neo hình dạng (BS, N_anchors).

bắt buộc
target_bboxes Tensor

Các hộp giới hạn sự thật nối đất cho mỗi mỏ neo có hình dạng (BS, N_anchors, 4).

bắt buộc
batch_idx Tensor

Chỉ số hàng loạt về hình dạng (N_labels_in_batch, 1).

bắt buộc
proto Tensor

Mặt nạ nguyên mẫu có hình dạng (BS, 32, H, W).

bắt buộc
pred_masks Tensor

Mặt nạ dự đoán cho mỗi mỏ neo hình dạng (BS, N_anchors, 32).

bắt buộc
imgsz Tensor

Kích thước của hình ảnh đầu vào dưới dạng tensor có hình dạng (2), tức là (H, W).

bắt buộc
overlap bool

Cho dù mặt nạ trong masks tensor chồng chéo.

bắt buộc

Trở lại:

Kiểu Sự miêu tả
Tensor

Tổn thất được tính toán cho phân đoạn phiên bản.

Ghi chú

Tổn thất hàng loạt có thể được tính toán để cải thiện tốc độ ở mức sử dụng bộ nhớ cao hơn. Ví dụ: pred_mask có thể được tính như sau: pred_mask = torch.einsum('in,nhw->ihw', pred, proto) # (i, 32) @ (32, 160, 160) -> (i, 160, 160)

Mã nguồn trong ultralytics/utils/loss.py
def calculate_segmentation_loss(
    self,
    fg_mask: torch.Tensor,
    masks: torch.Tensor,
    target_gt_idx: torch.Tensor,
    target_bboxes: torch.Tensor,
    batch_idx: torch.Tensor,
    proto: torch.Tensor,
    pred_masks: torch.Tensor,
    imgsz: torch.Tensor,
    overlap: bool,
) -> torch.Tensor:
    """
    Calculate the loss for instance segmentation.

    Args:
        fg_mask (torch.Tensor): A binary tensor of shape (BS, N_anchors) indicating which anchors are positive.
        masks (torch.Tensor): Ground truth masks of shape (BS, H, W) if `overlap` is False, otherwise (BS, ?, H, W).
        target_gt_idx (torch.Tensor): Indexes of ground truth objects for each anchor of shape (BS, N_anchors).
        target_bboxes (torch.Tensor): Ground truth bounding boxes for each anchor of shape (BS, N_anchors, 4).
        batch_idx (torch.Tensor): Batch indices of shape (N_labels_in_batch, 1).
        proto (torch.Tensor): Prototype masks of shape (BS, 32, H, W).
        pred_masks (torch.Tensor): Predicted masks for each anchor of shape (BS, N_anchors, 32).
        imgsz (torch.Tensor): Size of the input image as a tensor of shape (2), i.e., (H, W).
        overlap (bool): Whether the masks in `masks` tensor overlap.

    Returns:
        (torch.Tensor): The calculated loss for instance segmentation.

    Notes:
        The batch loss can be computed for improved speed at higher memory usage.
        For example, pred_mask can be computed as follows:
            pred_mask = torch.einsum('in,nhw->ihw', pred, proto)  # (i, 32) @ (32, 160, 160) -> (i, 160, 160)
    """
    _, _, mask_h, mask_w = proto.shape
    loss = 0

    # Normalize to 0-1
    target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]]

    # Areas of target bboxes
    marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2)

    # Normalize to mask size
    mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device)

    for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)):
        fg_mask_i, target_gt_idx_i, pred_masks_i, proto_i, mxyxy_i, marea_i, masks_i = single_i
        if fg_mask_i.any():
            mask_idx = target_gt_idx_i[fg_mask_i]
            if overlap:
                gt_mask = masks_i == (mask_idx + 1).view(-1, 1, 1)
                gt_mask = gt_mask.float()
            else:
                gt_mask = masks[batch_idx.view(-1) == i][mask_idx]

            loss += self.single_mask_loss(
                gt_mask, pred_masks_i[fg_mask_i], proto_i, mxyxy_i[fg_mask_i], marea_i[fg_mask_i]
            )

        # WARNING: lines below prevents Multi-GPU DDP 'unused gradient' PyTorch errors, do not remove
        else:
            loss += (proto * 0).sum() + (pred_masks * 0).sum()  # inf sums may lead to nan loss

    return loss / fg_mask.sum()

single_mask_loss(gt_mask, pred, proto, xyxy, area) staticmethod

Tính toán tổn thất phân đoạn phiên bản cho một hình ảnh.

Thông số:

Tên Kiểu Sự miêu tả Mặc định
gt_mask Tensor

Mặt nạ chân lý mặt đất của hình dạng (n, H, W), trong đó n là số lượng đối tượng.

bắt buộc
pred Tensor

Hệ số hình dạng mặt nạ dự đoán (n, 32).

bắt buộc
proto Tensor

Mặt nạ nguyên mẫu có hình dạng (32, H, W).

bắt buộc
xyxy Tensor

Các hộp giới hạn chân lý mặt đất ở định dạng xyxy, được chuẩn hóa thành [0, 1], có hình dạng (n, 4).

bắt buộc
area Tensor

Diện tích của mỗi hộp giới hạn chân lý mặt đất có hình dạng (n,).

bắt buộc

Trở lại:

Kiểu Sự miêu tả
Tensor

Mất mặt nạ được tính toán cho một hình ảnh.

Ghi chú

Hàm sử dụng phương trình pred_mask = torch.einsum('in,nhw->ihw', pred, proto) để tạo ra mặt nạ dự đoán từ mặt nạ nguyên mẫu và hệ số mặt nạ dự đoán.

Mã nguồn trong ultralytics/utils/loss.py
@staticmethod
def single_mask_loss(
    gt_mask: torch.Tensor, pred: torch.Tensor, proto: torch.Tensor, xyxy: torch.Tensor, area: torch.Tensor
) -> torch.Tensor:
    """
    Compute the instance segmentation loss for a single image.

    Args:
        gt_mask (torch.Tensor): Ground truth mask of shape (n, H, W), where n is the number of objects.
        pred (torch.Tensor): Predicted mask coefficients of shape (n, 32).
        proto (torch.Tensor): Prototype masks of shape (32, H, W).
        xyxy (torch.Tensor): Ground truth bounding boxes in xyxy format, normalized to [0, 1], of shape (n, 4).
        area (torch.Tensor): Area of each ground truth bounding box of shape (n,).

    Returns:
        (torch.Tensor): The calculated mask loss for a single image.

    Notes:
        The function uses the equation pred_mask = torch.einsum('in,nhw->ihw', pred, proto) to produce the
        predicted masks from the prototype masks and predicted mask coefficients.
    """
    pred_mask = torch.einsum("in,nhw->ihw", pred, proto)  # (n, 32) @ (32, 80, 80) -> (n, 80, 80)
    loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
    return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).sum()



ultralytics.utils.loss.v8PoseLoss

Căn cứ: v8DetectionLoss

Lớp tiêu chí tính toán tổn thất đào tạo.

Mã nguồn trong ultralytics/utils/loss.py
class v8PoseLoss(v8DetectionLoss):
    """Criterion class for computing training losses."""

    def __init__(self, model):  # model must be de-paralleled
        """Initializes v8PoseLoss with model, sets keypoint variables and declares a keypoint loss instance."""
        super().__init__(model)
        self.kpt_shape = model.model[-1].kpt_shape
        self.bce_pose = nn.BCEWithLogitsLoss()
        is_pose = self.kpt_shape == [17, 3]
        nkpt = self.kpt_shape[0]  # number of keypoints
        sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt
        self.keypoint_loss = KeypointLoss(sigmas=sigmas)

    def __call__(self, preds, batch):
        """Calculate the total loss and detach it."""
        loss = torch.zeros(5, device=self.device)  # box, cls, dfl, kpt_location, kpt_visibility
        feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1]
        pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
            (self.reg_max * 4, self.nc), 1
        )

        # B, grids, ..
        pred_scores = pred_scores.permute(0, 2, 1).contiguous()
        pred_distri = pred_distri.permute(0, 2, 1).contiguous()
        pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()

        dtype = pred_scores.dtype
        imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
        anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

        # Targets
        batch_size = pred_scores.shape[0]
        batch_idx = batch["batch_idx"].view(-1, 1)
        targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"]), 1)
        targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
        gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
        mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)

        # Pboxes
        pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)
        pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape))  # (b, h*w, 17, 3)

        _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
            pred_scores.detach().sigmoid(),
            (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
            anchor_points * stride_tensor,
            gt_labels,
            gt_bboxes,
            mask_gt,
        )

        target_scores_sum = max(target_scores.sum(), 1)

        # Cls loss
        # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
        loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

        # Bbox loss
        if fg_mask.sum():
            target_bboxes /= stride_tensor
            loss[0], loss[4] = self.bbox_loss(
                pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
            )
            keypoints = batch["keypoints"].to(self.device).float().clone()
            keypoints[..., 0] *= imgsz[1]
            keypoints[..., 1] *= imgsz[0]

            loss[1], loss[2] = self.calculate_keypoints_loss(
                fg_mask, target_gt_idx, keypoints, batch_idx, stride_tensor, target_bboxes, pred_kpts
            )

        loss[0] *= self.hyp.box  # box gain
        loss[1] *= self.hyp.pose  # pose gain
        loss[2] *= self.hyp.kobj  # kobj gain
        loss[3] *= self.hyp.cls  # cls gain
        loss[4] *= self.hyp.dfl  # dfl gain

        return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)

    @staticmethod
    def kpts_decode(anchor_points, pred_kpts):
        """Decodes predicted keypoints to image coordinates."""
        y = pred_kpts.clone()
        y[..., :2] *= 2.0
        y[..., 0] += anchor_points[:, [0]] - 0.5
        y[..., 1] += anchor_points[:, [1]] - 0.5
        return y

    def calculate_keypoints_loss(
        self, masks, target_gt_idx, keypoints, batch_idx, stride_tensor, target_bboxes, pred_kpts
    ):
        """
        Calculate the keypoints loss for the model.

        This function calculates the keypoints loss and keypoints object loss for a given batch. The keypoints loss is
        based on the difference between the predicted keypoints and ground truth keypoints. The keypoints object loss is
        a binary classification loss that classifies whether a keypoint is present or not.

        Args:
            masks (torch.Tensor): Binary mask tensor indicating object presence, shape (BS, N_anchors).
            target_gt_idx (torch.Tensor): Index tensor mapping anchors to ground truth objects, shape (BS, N_anchors).
            keypoints (torch.Tensor): Ground truth keypoints, shape (N_kpts_in_batch, N_kpts_per_object, kpts_dim).
            batch_idx (torch.Tensor): Batch index tensor for keypoints, shape (N_kpts_in_batch, 1).
            stride_tensor (torch.Tensor): Stride tensor for anchors, shape (N_anchors, 1).
            target_bboxes (torch.Tensor): Ground truth boxes in (x1, y1, x2, y2) format, shape (BS, N_anchors, 4).
            pred_kpts (torch.Tensor): Predicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim).

        Returns:
            (tuple): Returns a tuple containing:
                - kpts_loss (torch.Tensor): The keypoints loss.
                - kpts_obj_loss (torch.Tensor): The keypoints object loss.
        """
        batch_idx = batch_idx.flatten()
        batch_size = len(masks)

        # Find the maximum number of keypoints in a single image
        max_kpts = torch.unique(batch_idx, return_counts=True)[1].max()

        # Create a tensor to hold batched keypoints
        batched_keypoints = torch.zeros(
            (batch_size, max_kpts, keypoints.shape[1], keypoints.shape[2]), device=keypoints.device
        )

        # TODO: any idea how to vectorize this?
        # Fill batched_keypoints with keypoints based on batch_idx
        for i in range(batch_size):
            keypoints_i = keypoints[batch_idx == i]
            batched_keypoints[i, : keypoints_i.shape[0]] = keypoints_i

        # Expand dimensions of target_gt_idx to match the shape of batched_keypoints
        target_gt_idx_expanded = target_gt_idx.unsqueeze(-1).unsqueeze(-1)

        # Use target_gt_idx_expanded to select keypoints from batched_keypoints
        selected_keypoints = batched_keypoints.gather(
            1, target_gt_idx_expanded.expand(-1, -1, keypoints.shape[1], keypoints.shape[2])
        )

        # Divide coordinates by stride
        selected_keypoints /= stride_tensor.view(1, -1, 1, 1)

        kpts_loss = 0
        kpts_obj_loss = 0

        if masks.any():
            gt_kpt = selected_keypoints[masks]
            area = xyxy2xywh(target_bboxes[masks])[:, 2:].prod(1, keepdim=True)
            pred_kpt = pred_kpts[masks]
            kpt_mask = gt_kpt[..., 2] != 0 if gt_kpt.shape[-1] == 3 else torch.full_like(gt_kpt[..., 0], True)
            kpts_loss = self.keypoint_loss(pred_kpt, gt_kpt, kpt_mask, area)  # pose loss

            if pred_kpt.shape[-1] == 3:
                kpts_obj_loss = self.bce_pose(pred_kpt[..., 2], kpt_mask.float())  # keypoint obj loss

        return kpts_loss, kpts_obj_loss

__call__(preds, batch)

Tính tổng tổn thất và tách nó ra.

Mã nguồn trong ultralytics/utils/loss.py
def __call__(self, preds, batch):
    """Calculate the total loss and detach it."""
    loss = torch.zeros(5, device=self.device)  # box, cls, dfl, kpt_location, kpt_visibility
    feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1]
    pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
        (self.reg_max * 4, self.nc), 1
    )

    # B, grids, ..
    pred_scores = pred_scores.permute(0, 2, 1).contiguous()
    pred_distri = pred_distri.permute(0, 2, 1).contiguous()
    pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()

    dtype = pred_scores.dtype
    imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
    anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

    # Targets
    batch_size = pred_scores.shape[0]
    batch_idx = batch["batch_idx"].view(-1, 1)
    targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"]), 1)
    targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
    gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
    mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)

    # Pboxes
    pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)
    pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape))  # (b, h*w, 17, 3)

    _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
        pred_scores.detach().sigmoid(),
        (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
        anchor_points * stride_tensor,
        gt_labels,
        gt_bboxes,
        mask_gt,
    )

    target_scores_sum = max(target_scores.sum(), 1)

    # Cls loss
    # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
    loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

    # Bbox loss
    if fg_mask.sum():
        target_bboxes /= stride_tensor
        loss[0], loss[4] = self.bbox_loss(
            pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
        )
        keypoints = batch["keypoints"].to(self.device).float().clone()
        keypoints[..., 0] *= imgsz[1]
        keypoints[..., 1] *= imgsz[0]

        loss[1], loss[2] = self.calculate_keypoints_loss(
            fg_mask, target_gt_idx, keypoints, batch_idx, stride_tensor, target_bboxes, pred_kpts
        )

    loss[0] *= self.hyp.box  # box gain
    loss[1] *= self.hyp.pose  # pose gain
    loss[2] *= self.hyp.kobj  # kobj gain
    loss[3] *= self.hyp.cls  # cls gain
    loss[4] *= self.hyp.dfl  # dfl gain

    return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)

__init__(model)

Khởi tạo v8PoseLoss với model, đặt các biến keypoint và khai báo một instance mất keypoint.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self, model):  # model must be de-paralleled
    """Initializes v8PoseLoss with model, sets keypoint variables and declares a keypoint loss instance."""
    super().__init__(model)
    self.kpt_shape = model.model[-1].kpt_shape
    self.bce_pose = nn.BCEWithLogitsLoss()
    is_pose = self.kpt_shape == [17, 3]
    nkpt = self.kpt_shape[0]  # number of keypoints
    sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt
    self.keypoint_loss = KeypointLoss(sigmas=sigmas)

calculate_keypoints_loss(masks, target_gt_idx, keypoints, batch_idx, stride_tensor, target_bboxes, pred_kpts)

Tính toán tổn thất điểm chính cho mô hình.

Hàm này tính toán tổn thất điểm chính và mất đối tượng điểm chính cho một lô nhất định. Mất điểm chính là dựa trên sự khác biệt giữa các điểm chính được dự đoán và các điểm chính sự thật cơ bản. Mất đối tượng điểm chính là Một mất mát phân loại nhị phân phân loại xem một điểm chính có mặt hay không.

Thông số:

Tên Kiểu Sự miêu tả Mặc định
masks Tensor

Mặt nạ nhị phân tensor chỉ ra sự hiện diện của đối tượng, hình dạng (BS, N_anchors).

bắt buộc
target_gt_idx Tensor

Chỉ số tensor ánh xạ neo đến mặt đất các đối tượng, hình dạng sự thật (BS, N_anchors).

bắt buộc
keypoints Tensor

Điểm mấu chốt, hình dạng (N_kpts_in_batch, N_kpts_per_object, kpts_dim).

bắt buộc
batch_idx Tensor

Chỉ số hàng loạt tensor Đối với các điểm chính, hình dạng (N_kpts_in_batch, 1).

bắt buộc
stride_tensor Tensor

Stride tensor Đối với neo, hình dạng (N_anchors, 1).

bắt buộc
target_bboxes Tensor

Hộp sự thật nối đất ở định dạng (x1, y1, x2, y2), hình dạng (BS, N_anchors, 4).

bắt buộc
pred_kpts Tensor

Các điểm chính, hình dạng dự đoán (BS, N_anchors, N_kpts_per_object, kpts_dim).

bắt buộc

Trở lại:

Kiểu Sự miêu tả
tuple

Trả về một bộ dữ liệu chứa: - kpts_loss (torch.Tensor): Mất điểm chính. - kpts_obj_loss (torch.Tensor): Mất đối tượng điểm chính.

Mã nguồn trong ultralytics/utils/loss.py
def calculate_keypoints_loss(
    self, masks, target_gt_idx, keypoints, batch_idx, stride_tensor, target_bboxes, pred_kpts
):
    """
    Calculate the keypoints loss for the model.

    This function calculates the keypoints loss and keypoints object loss for a given batch. The keypoints loss is
    based on the difference between the predicted keypoints and ground truth keypoints. The keypoints object loss is
    a binary classification loss that classifies whether a keypoint is present or not.

    Args:
        masks (torch.Tensor): Binary mask tensor indicating object presence, shape (BS, N_anchors).
        target_gt_idx (torch.Tensor): Index tensor mapping anchors to ground truth objects, shape (BS, N_anchors).
        keypoints (torch.Tensor): Ground truth keypoints, shape (N_kpts_in_batch, N_kpts_per_object, kpts_dim).
        batch_idx (torch.Tensor): Batch index tensor for keypoints, shape (N_kpts_in_batch, 1).
        stride_tensor (torch.Tensor): Stride tensor for anchors, shape (N_anchors, 1).
        target_bboxes (torch.Tensor): Ground truth boxes in (x1, y1, x2, y2) format, shape (BS, N_anchors, 4).
        pred_kpts (torch.Tensor): Predicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim).

    Returns:
        (tuple): Returns a tuple containing:
            - kpts_loss (torch.Tensor): The keypoints loss.
            - kpts_obj_loss (torch.Tensor): The keypoints object loss.
    """
    batch_idx = batch_idx.flatten()
    batch_size = len(masks)

    # Find the maximum number of keypoints in a single image
    max_kpts = torch.unique(batch_idx, return_counts=True)[1].max()

    # Create a tensor to hold batched keypoints
    batched_keypoints = torch.zeros(
        (batch_size, max_kpts, keypoints.shape[1], keypoints.shape[2]), device=keypoints.device
    )

    # TODO: any idea how to vectorize this?
    # Fill batched_keypoints with keypoints based on batch_idx
    for i in range(batch_size):
        keypoints_i = keypoints[batch_idx == i]
        batched_keypoints[i, : keypoints_i.shape[0]] = keypoints_i

    # Expand dimensions of target_gt_idx to match the shape of batched_keypoints
    target_gt_idx_expanded = target_gt_idx.unsqueeze(-1).unsqueeze(-1)

    # Use target_gt_idx_expanded to select keypoints from batched_keypoints
    selected_keypoints = batched_keypoints.gather(
        1, target_gt_idx_expanded.expand(-1, -1, keypoints.shape[1], keypoints.shape[2])
    )

    # Divide coordinates by stride
    selected_keypoints /= stride_tensor.view(1, -1, 1, 1)

    kpts_loss = 0
    kpts_obj_loss = 0

    if masks.any():
        gt_kpt = selected_keypoints[masks]
        area = xyxy2xywh(target_bboxes[masks])[:, 2:].prod(1, keepdim=True)
        pred_kpt = pred_kpts[masks]
        kpt_mask = gt_kpt[..., 2] != 0 if gt_kpt.shape[-1] == 3 else torch.full_like(gt_kpt[..., 0], True)
        kpts_loss = self.keypoint_loss(pred_kpt, gt_kpt, kpt_mask, area)  # pose loss

        if pred_kpt.shape[-1] == 3:
            kpts_obj_loss = self.bce_pose(pred_kpt[..., 2], kpt_mask.float())  # keypoint obj loss

    return kpts_loss, kpts_obj_loss

kpts_decode(anchor_points, pred_kpts) staticmethod

Giải mã các điểm chính dự đoán thành tọa độ hình ảnh.

Mã nguồn trong ultralytics/utils/loss.py
@staticmethod
def kpts_decode(anchor_points, pred_kpts):
    """Decodes predicted keypoints to image coordinates."""
    y = pred_kpts.clone()
    y[..., :2] *= 2.0
    y[..., 0] += anchor_points[:, [0]] - 0.5
    y[..., 1] += anchor_points[:, [1]] - 0.5
    return y



ultralytics.utils.loss.v8ClassificationLoss

Lớp tiêu chí tính toán tổn thất đào tạo.

Mã nguồn trong ultralytics/utils/loss.py
class v8ClassificationLoss:
    """Criterion class for computing training losses."""

    def __call__(self, preds, batch):
        """Compute the classification loss between predictions and true labels."""
        loss = torch.nn.functional.cross_entropy(preds, batch["cls"], reduction="mean")
        loss_items = loss.detach()
        return loss, loss_items

__call__(preds, batch)

Tính toán tổn thất phân loại giữa dự đoán và nhãn thật.

Mã nguồn trong ultralytics/utils/loss.py
def __call__(self, preds, batch):
    """Compute the classification loss between predictions and true labels."""
    loss = torch.nn.functional.cross_entropy(preds, batch["cls"], reduction="mean")
    loss_items = loss.detach()
    return loss, loss_items



ultralytics.utils.loss.v8OBBLoss

Căn cứ: v8DetectionLoss

Mã nguồn trong ultralytics/utils/loss.py
class v8OBBLoss(v8DetectionLoss):
    def __init__(self, model):
        """
        Initializes v8OBBLoss with model, assigner, and rotated bbox loss.

        Note model must be de-paralleled.
        """
        super().__init__(model)
        self.assigner = RotatedTaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)
        self.bbox_loss = RotatedBboxLoss(self.reg_max - 1, use_dfl=self.use_dfl).to(self.device)

    def preprocess(self, targets, batch_size, scale_tensor):
        """Preprocesses the target counts and matches with the input batch size to output a tensor."""
        if targets.shape[0] == 0:
            out = torch.zeros(batch_size, 0, 6, device=self.device)
        else:
            i = targets[:, 0]  # image index
            _, counts = i.unique(return_counts=True)
            counts = counts.to(dtype=torch.int32)
            out = torch.zeros(batch_size, counts.max(), 6, device=self.device)
            for j in range(batch_size):
                matches = i == j
                n = matches.sum()
                if n:
                    bboxes = targets[matches, 2:]
                    bboxes[..., :4].mul_(scale_tensor)
                    out[j, :n] = torch.cat([targets[matches, 1:2], bboxes], dim=-1)
        return out

    def __call__(self, preds, batch):
        """Calculate and return the loss for the YOLO model."""
        loss = torch.zeros(3, device=self.device)  # box, cls, dfl
        feats, pred_angle = preds if isinstance(preds[0], list) else preds[1]
        batch_size = pred_angle.shape[0]  # batch size, number of masks, mask height, mask width
        pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
            (self.reg_max * 4, self.nc), 1
        )

        # b, grids, ..
        pred_scores = pred_scores.permute(0, 2, 1).contiguous()
        pred_distri = pred_distri.permute(0, 2, 1).contiguous()
        pred_angle = pred_angle.permute(0, 2, 1).contiguous()

        dtype = pred_scores.dtype
        imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
        anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

        # targets
        try:
            batch_idx = batch["batch_idx"].view(-1, 1)
            targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"].view(-1, 5)), 1)
            rw, rh = targets[:, 4] * imgsz[0].item(), targets[:, 5] * imgsz[1].item()
            targets = targets[(rw >= 2) & (rh >= 2)]  # filter rboxes of tiny size to stabilize training
            targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
            gt_labels, gt_bboxes = targets.split((1, 5), 2)  # cls, xywhr
            mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
        except RuntimeError as e:
            raise TypeError(
                "ERROR ❌ OBB dataset incorrectly formatted or not a OBB dataset.\n"
                "This error can occur when incorrectly training a 'OBB' model on a 'detect' dataset, "
                "i.e. 'yolo train model=yolov8n-obb.pt data=dota8.yaml'.\nVerify your dataset is a "
                "correctly formatted 'OBB' dataset using 'data=dota8.yaml' "
                "as an example.\nSee https://docs.ultralytics.com/datasets/obb/ for help."
            ) from e

        # Pboxes
        pred_bboxes = self.bbox_decode(anchor_points, pred_distri, pred_angle)  # xyxy, (b, h*w, 4)

        bboxes_for_assigner = pred_bboxes.clone().detach()
        # Only the first four elements need to be scaled
        bboxes_for_assigner[..., :4] *= stride_tensor
        _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
            pred_scores.detach().sigmoid(),
            bboxes_for_assigner.type(gt_bboxes.dtype),
            anchor_points * stride_tensor,
            gt_labels,
            gt_bboxes,
            mask_gt,
        )

        target_scores_sum = max(target_scores.sum(), 1)

        # Cls loss
        # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
        loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

        # Bbox loss
        if fg_mask.sum():
            target_bboxes[..., :4] /= stride_tensor
            loss[0], loss[2] = self.bbox_loss(
                pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
            )
        else:
            loss[0] += (pred_angle * 0).sum()

        loss[0] *= self.hyp.box  # box gain
        loss[1] *= self.hyp.cls  # cls gain
        loss[2] *= self.hyp.dfl  # dfl gain

        return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)

    def bbox_decode(self, anchor_points, pred_dist, pred_angle):
        """
        Decode predicted object bounding box coordinates from anchor points and distribution.

        Args:
            anchor_points (torch.Tensor): Anchor points, (h*w, 2).
            pred_dist (torch.Tensor): Predicted rotated distance, (bs, h*w, 4).
            pred_angle (torch.Tensor): Predicted angle, (bs, h*w, 1).

        Returns:
            (torch.Tensor): Predicted rotated bounding boxes with angles, (bs, h*w, 5).
        """
        if self.use_dfl:
            b, a, c = pred_dist.shape  # batch, anchors, channels
            pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
        return torch.cat((dist2rbox(pred_dist, pred_angle, anchor_points), pred_angle), dim=-1)

__call__(preds, batch)

Tính toán và trả lại tổn thất cho YOLO mẫu.

Mã nguồn trong ultralytics/utils/loss.py
def __call__(self, preds, batch):
    """Calculate and return the loss for the YOLO model."""
    loss = torch.zeros(3, device=self.device)  # box, cls, dfl
    feats, pred_angle = preds if isinstance(preds[0], list) else preds[1]
    batch_size = pred_angle.shape[0]  # batch size, number of masks, mask height, mask width
    pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
        (self.reg_max * 4, self.nc), 1
    )

    # b, grids, ..
    pred_scores = pred_scores.permute(0, 2, 1).contiguous()
    pred_distri = pred_distri.permute(0, 2, 1).contiguous()
    pred_angle = pred_angle.permute(0, 2, 1).contiguous()

    dtype = pred_scores.dtype
    imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
    anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

    # targets
    try:
        batch_idx = batch["batch_idx"].view(-1, 1)
        targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"].view(-1, 5)), 1)
        rw, rh = targets[:, 4] * imgsz[0].item(), targets[:, 5] * imgsz[1].item()
        targets = targets[(rw >= 2) & (rh >= 2)]  # filter rboxes of tiny size to stabilize training
        targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
        gt_labels, gt_bboxes = targets.split((1, 5), 2)  # cls, xywhr
        mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
    except RuntimeError as e:
        raise TypeError(
            "ERROR ❌ OBB dataset incorrectly formatted or not a OBB dataset.\n"
            "This error can occur when incorrectly training a 'OBB' model on a 'detect' dataset, "
            "i.e. 'yolo train model=yolov8n-obb.pt data=dota8.yaml'.\nVerify your dataset is a "
            "correctly formatted 'OBB' dataset using 'data=dota8.yaml' "
            "as an example.\nSee https://docs.ultralytics.com/datasets/obb/ for help."
        ) from e

    # Pboxes
    pred_bboxes = self.bbox_decode(anchor_points, pred_distri, pred_angle)  # xyxy, (b, h*w, 4)

    bboxes_for_assigner = pred_bboxes.clone().detach()
    # Only the first four elements need to be scaled
    bboxes_for_assigner[..., :4] *= stride_tensor
    _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
        pred_scores.detach().sigmoid(),
        bboxes_for_assigner.type(gt_bboxes.dtype),
        anchor_points * stride_tensor,
        gt_labels,
        gt_bboxes,
        mask_gt,
    )

    target_scores_sum = max(target_scores.sum(), 1)

    # Cls loss
    # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
    loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

    # Bbox loss
    if fg_mask.sum():
        target_bboxes[..., :4] /= stride_tensor
        loss[0], loss[2] = self.bbox_loss(
            pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
        )
    else:
        loss[0] += (pred_angle * 0).sum()

    loss[0] *= self.hyp.box  # box gain
    loss[1] *= self.hyp.cls  # cls gain
    loss[2] *= self.hyp.dfl  # dfl gain

    return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)

__init__(model)

Khởi tạo v8OBBLoss với model, assigner và mất bbox xoay.

Lưu ý mô hình phải được de-paralleled.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self, model):
    """
    Initializes v8OBBLoss with model, assigner, and rotated bbox loss.

    Note model must be de-paralleled.
    """
    super().__init__(model)
    self.assigner = RotatedTaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)
    self.bbox_loss = RotatedBboxLoss(self.reg_max - 1, use_dfl=self.use_dfl).to(self.device)

bbox_decode(anchor_points, pred_dist, pred_angle)

Giải mã tọa độ hộp giới hạn đối tượng dự đoán từ các điểm neo và phân bố.

Thông số:

Tên Kiểu Sự miêu tả Mặc định
anchor_points Tensor

Điểm neo, (h * w, 2).

bắt buộc
pred_dist Tensor

Khoảng cách quay dự đoán, (bs, h * w, 4).

bắt buộc
pred_angle Tensor

Góc dự đoán, (bs, h * w, 1).

bắt buộc

Trở lại:

Kiểu Sự miêu tả
Tensor

Các hộp giới hạn xoay dự đoán với các góc, (bs, h * w, 5).

Mã nguồn trong ultralytics/utils/loss.py
def bbox_decode(self, anchor_points, pred_dist, pred_angle):
    """
    Decode predicted object bounding box coordinates from anchor points and distribution.

    Args:
        anchor_points (torch.Tensor): Anchor points, (h*w, 2).
        pred_dist (torch.Tensor): Predicted rotated distance, (bs, h*w, 4).
        pred_angle (torch.Tensor): Predicted angle, (bs, h*w, 1).

    Returns:
        (torch.Tensor): Predicted rotated bounding boxes with angles, (bs, h*w, 5).
    """
    if self.use_dfl:
        b, a, c = pred_dist.shape  # batch, anchors, channels
        pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
    return torch.cat((dist2rbox(pred_dist, pred_angle, anchor_points), pred_angle), dim=-1)

preprocess(targets, batch_size, scale_tensor)

Xử lý trước số lượng mục tiêu và khớp với kích thước lô đầu vào để xuất ra tensor.

Mã nguồn trong ultralytics/utils/loss.py
def preprocess(self, targets, batch_size, scale_tensor):
    """Preprocesses the target counts and matches with the input batch size to output a tensor."""
    if targets.shape[0] == 0:
        out = torch.zeros(batch_size, 0, 6, device=self.device)
    else:
        i = targets[:, 0]  # image index
        _, counts = i.unique(return_counts=True)
        counts = counts.to(dtype=torch.int32)
        out = torch.zeros(batch_size, counts.max(), 6, device=self.device)
        for j in range(batch_size):
            matches = i == j
            n = matches.sum()
            if n:
                bboxes = targets[matches, 2:]
                bboxes[..., :4].mul_(scale_tensor)
                out[j, :n] = torch.cat([targets[matches, 1:2], bboxes], dim=-1)
    return out



ultralytics.utils.loss.E2EDetectLoss

Lớp tiêu chí tính toán tổn thất đào tạo.

Mã nguồn trong ultralytics/utils/loss.py
class E2EDetectLoss:
    """Criterion class for computing training losses."""

    def __init__(self, model):
        """Initialize E2EDetectLoss with one-to-many and one-to-one detection losses using the provided model."""
        self.one2many = v8DetectionLoss(model, tal_topk=10)
        self.one2one = v8DetectionLoss(model, tal_topk=1)

    def __call__(self, preds, batch):
        """Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
        preds = preds[1] if isinstance(preds, tuple) else preds
        one2many = preds["one2many"]
        loss_one2many = self.one2many(one2many, batch)
        one2one = preds["one2one"]
        loss_one2one = self.one2one(one2one, batch)
        return loss_one2many[0] + loss_one2one[0], loss_one2many[1] + loss_one2one[1]

__call__(preds, batch)

Tính tổng tổn thất cho hộp, cls và dfl nhân với kích thước lô.

Mã nguồn trong ultralytics/utils/loss.py
def __call__(self, preds, batch):
    """Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
    preds = preds[1] if isinstance(preds, tuple) else preds
    one2many = preds["one2many"]
    loss_one2many = self.one2many(one2many, batch)
    one2one = preds["one2one"]
    loss_one2one = self.one2one(one2one, batch)
    return loss_one2many[0] + loss_one2one[0], loss_one2many[1] + loss_one2one[1]

__init__(model)

Initialize E2EDetectLoss with one-to-many and one-to-one detection losses using the provided model.

Mã nguồn trong ultralytics/utils/loss.py
def __init__(self, model):
    """Initialize E2EDetectLoss with one-to-many and one-to-one detection losses using the provided model."""
    self.one2many = v8DetectionLoss(model, tal_topk=10)
    self.one2one = v8DetectionLoss(model, tal_topk=1)





Created 2023-11-12, Updated 2024-06-20
Authors: Burhan-Q (2), glenn-jocher (6), Laughing-q (1)