Skip to content

Reference for ultralytics/utils/ops.py

Note

This file is available at https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py. If you spot a problem please help fix it by contributing a Pull Request 🛠️. Thank you 🙏!


ultralytics.utils.ops.Profile

Profile(t=0.0, device: device = None)

Bases: ContextDecorator

YOLOv8 Profile class. Use as a decorator with @Profile() or as a context manager with 'with Profile():'.

Attributes:

Name Type Description
t float

Accumulated time.

device device

Device used for model inference.

cuda bool

Whether CUDA is being used.

Examples:

>>> from ultralytics.utils.ops import Profile
>>> with Profile(device=device) as dt:
...     pass  # slow operation here
>>> print(dt)  # prints "Elapsed time is 9.5367431640625e-07 s"

Parameters:

Name Type Description Default
t float

Initial time.

0.0
device device

Device used for model inference.

None
Source code in ultralytics/utils/ops.py
33
34
35
36
37
38
39
40
41
42
43
def __init__(self, t=0.0, device: torch.device = None):
    """
    Initialize the Profile class.

    Args:
        t (float): Initial time.
        device (torch.device): Device used for model inference.
    """
    self.t = t
    self.device = device
    self.cuda = bool(device and str(device).startswith("cuda"))

__enter__

__enter__()

Start timing.

Source code in ultralytics/utils/ops.py
45
46
47
48
def __enter__(self):
    """Start timing."""
    self.start = self.time()
    return self

__exit__

__exit__(type, value, traceback)

Stop timing.

Source code in ultralytics/utils/ops.py
50
51
52
53
def __exit__(self, type, value, traceback):  # noqa
    """Stop timing."""
    self.dt = self.time() - self.start  # delta-time
    self.t += self.dt  # accumulate dt

__str__

__str__()

Returns a human-readable string representing the accumulated elapsed time in the profiler.

Source code in ultralytics/utils/ops.py
55
56
57
def __str__(self):
    """Returns a human-readable string representing the accumulated elapsed time in the profiler."""
    return f"Elapsed time is {self.t} s"

time

time()

Get current time.

Source code in ultralytics/utils/ops.py
59
60
61
62
63
def time(self):
    """Get current time."""
    if self.cuda:
        torch.cuda.synchronize(self.device)
    return time.perf_counter()





ultralytics.utils.ops.segment2box

segment2box(segment, width=640, height=640)

Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy).

Parameters:

Name Type Description Default
segment Tensor

The segment label.

required
width int

The width of the image.

640
height int

The height of the image.

640

Returns:

Type Description
ndarray

The minimum and maximum x and y values of the segment.

Source code in ultralytics/utils/ops.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def segment2box(segment, width=640, height=640):
    """
    Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy).

    Args:
        segment (torch.Tensor): The segment label.
        width (int): The width of the image.
        height (int): The height of the image.

    Returns:
        (np.ndarray): The minimum and maximum x and y values of the segment.
    """
    x, y = segment.T  # segment xy
    # any 3 out of 4 sides are outside the image, clip coordinates first, https://github.com/ultralytics/ultralytics/pull/18294
    if np.array([x.min() < 0, y.min() < 0, x.max() > width, y.max() > height]).sum() >= 3:
        x = x.clip(0, width)
        y = y.clip(0, height)
    inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
    x = x[inside]
    y = y[inside]
    return (
        np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype)
        if any(x)
        else np.zeros(4, dtype=segment.dtype)
    )  # xyxy





ultralytics.utils.ops.scale_boxes

scale_boxes(
    img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False
)

Rescale bounding boxes from img1_shape to img0_shape.

Parameters:

Name Type Description Default
img1_shape tuple

The shape of the image that the bounding boxes are for, in the format of (height, width).

required
boxes Tensor

The bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2).

required
img0_shape tuple

The shape of the target image, in the format of (height, width).

required
ratio_pad tuple

A tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be calculated based on the size difference between the two images.

None
padding bool

If True, assuming the boxes is based on image augmented by yolo style. If False then do regular rescaling.

True
xywh bool

The box format is xywh or not.

False

Returns:

Type Description
Tensor

The scaled bounding boxes, in the format of (x1, y1, x2, y2).

Source code in ultralytics/utils/ops.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False):
    """
    Rescale bounding boxes from img1_shape to img0_shape.

    Args:
        img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
        boxes (torch.Tensor): The bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2).
        img0_shape (tuple): The shape of the target image, in the format of (height, width).
        ratio_pad (tuple): A tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
            calculated based on the size difference between the two images.
        padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
            rescaling.
        xywh (bool): The box format is xywh or not.

    Returns:
        (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2).
    """
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (
            round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1),
            round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1),
        )  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    if padding:
        boxes[..., 0] -= pad[0]  # x padding
        boxes[..., 1] -= pad[1]  # y padding
        if not xywh:
            boxes[..., 2] -= pad[0]  # x padding
            boxes[..., 3] -= pad[1]  # y padding
    boxes[..., :4] /= gain
    return clip_boxes(boxes, img0_shape)





ultralytics.utils.ops.make_divisible

make_divisible(x, divisor)

Returns the nearest number that is divisible by the given divisor.

Parameters:

Name Type Description Default
x int

The number to make divisible.

required
divisor int | Tensor

The divisor.

required

Returns:

Type Description
int

The nearest number divisible by the divisor.

Source code in ultralytics/utils/ops.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def make_divisible(x, divisor):
    """
    Returns the nearest number that is divisible by the given divisor.

    Args:
        x (int): The number to make divisible.
        divisor (int | torch.Tensor): The divisor.

    Returns:
        (int): The nearest number divisible by the divisor.
    """
    if isinstance(divisor, torch.Tensor):
        divisor = int(divisor.max())  # to int
    return math.ceil(x / divisor) * divisor





ultralytics.utils.ops.nms_rotated

nms_rotated(boxes, scores, threshold=0.45, use_triu=True)

NMS for oriented bounding boxes using probiou and fast-nms.

Parameters:

Name Type Description Default
boxes Tensor

Rotated bounding boxes, shape (N, 5), format xywhr.

required
scores Tensor

Confidence scores, shape (N,).

required
threshold float

IoU threshold.

0.45
use_triu bool

Whether to use torch.triu operator. It'd be useful for disable it when exporting obb models to some formats that do not support torch.triu.

True

Returns:

Type Description
Tensor

Indices of boxes to keep after NMS.

Source code in ultralytics/utils/ops.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def nms_rotated(boxes, scores, threshold=0.45, use_triu=True):
    """
    NMS for oriented bounding boxes using probiou and fast-nms.

    Args:
        boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr.
        scores (torch.Tensor): Confidence scores, shape (N,).
        threshold (float): IoU threshold.
        use_triu (bool): Whether to use `torch.triu` operator. It'd be useful for disable it
            when exporting obb models to some formats that do not support `torch.triu`.

    Returns:
        (torch.Tensor): Indices of boxes to keep after NMS.
    """
    sorted_idx = torch.argsort(scores, descending=True)
    boxes = boxes[sorted_idx]
    ious = batch_probiou(boxes, boxes)
    if use_triu:
        ious = ious.triu_(diagonal=1)
        # pick = torch.nonzero(ious.max(dim=0)[0] < threshold).squeeze_(-1)
        # NOTE: handle the case when len(boxes) hence exportable by eliminating if-else condition
        pick = torch.nonzero((ious >= threshold).sum(0) <= 0).squeeze_(-1)
    else:
        n = boxes.shape[0]
        row_idx = torch.arange(n, device=boxes.device).view(-1, 1).expand(-1, n)
        col_idx = torch.arange(n, device=boxes.device).view(1, -1).expand(n, -1)
        upper_mask = row_idx < col_idx
        ious = ious * upper_mask
        # Zeroing these scores ensures the additional indices would not affect the final results
        scores[~((ious >= threshold).sum(0) <= 0)] = 0
        # NOTE: return indices with fixed length to avoid TFLite reshape error
        pick = torch.topk(scores, scores.shape[0]).indices
    return sorted_idx[pick]





ultralytics.utils.ops.non_max_suppression

non_max_suppression(
    prediction,
    conf_thres=0.25,
    iou_thres=0.45,
    classes=None,
    agnostic=False,
    multi_label=False,
    labels=(),
    max_det=300,
    nc=0,
    max_time_img=0.05,
    max_nms=30000,
    max_wh=7680,
    in_place=True,
    rotated=False,
    end2end=False,
    return_idxs=False,
)

Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.

Parameters:

Name Type Description Default
prediction Tensor

A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes) containing the predicted boxes, classes, and masks. The tensor should be in the format output by a model, such as YOLO.

required
conf_thres float

The confidence threshold below which boxes will be filtered out. Valid values are between 0.0 and 1.0.

0.25
iou_thres float

The IoU threshold below which boxes will be filtered out during NMS. Valid values are between 0.0 and 1.0.

0.45
classes List[int]

A list of class indices to consider. If None, all classes will be considered.

None
agnostic bool

If True, the model is agnostic to the number of classes, and all classes will be considered as one.

False
multi_label bool

If True, each box may have multiple labels.

False
labels List[List[Union[int, float, Tensor]]]

A list of lists, where each inner list contains the apriori labels for a given image. The list should be in the format output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).

()
max_det int

The maximum number of boxes to keep after NMS.

300
nc int

The number of classes output by the model. Any indices after this will be considered masks.

0
max_time_img float

The maximum time (seconds) for processing one image.

0.05
max_nms int

The maximum number of boxes into torchvision.ops.nms().

30000
max_wh int

The maximum box width and height in pixels.

7680
in_place bool

If True, the input prediction tensor will be modified in place.

True
rotated bool

If Oriented Bounding Boxes (OBB) are being passed for NMS.

False
end2end bool

If the model doesn't require NMS.

False
return_idxs bool

Return the indices of the detections that were kept.

False

Returns:

Type Description
List[Tensor]

A list of length batch_size, where each element is a tensor of shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).

Source code in ultralytics/utils/ops.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def non_max_suppression(
    prediction,
    conf_thres=0.25,
    iou_thres=0.45,
    classes=None,
    agnostic=False,
    multi_label=False,
    labels=(),
    max_det=300,
    nc=0,  # number of classes (optional)
    max_time_img=0.05,
    max_nms=30000,
    max_wh=7680,
    in_place=True,
    rotated=False,
    end2end=False,
    return_idxs=False,
):
    """
    Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.

    Args:
        prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
            containing the predicted boxes, classes, and masks. The tensor should be in the format
            output by a model, such as YOLO.
        conf_thres (float): The confidence threshold below which boxes will be filtered out.
            Valid values are between 0.0 and 1.0.
        iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
            Valid values are between 0.0 and 1.0.
        classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
        agnostic (bool): If True, the model is agnostic to the number of classes, and all
            classes will be considered as one.
        multi_label (bool): If True, each box may have multiple labels.
        labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
            list contains the apriori labels for a given image. The list should be in the format
            output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
        max_det (int): The maximum number of boxes to keep after NMS.
        nc (int): The number of classes output by the model. Any indices after this will be considered masks.
        max_time_img (float): The maximum time (seconds) for processing one image.
        max_nms (int): The maximum number of boxes into torchvision.ops.nms().
        max_wh (int): The maximum box width and height in pixels.
        in_place (bool): If True, the input prediction tensor will be modified in place.
        rotated (bool): If Oriented Bounding Boxes (OBB) are being passed for NMS.
        end2end (bool): If the model doesn't require NMS.
        return_idxs (bool): Return the indices of the detections that were kept.

    Returns:
        (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
            shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
            (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
    """
    import torchvision  # scope for faster 'import ultralytics'

    # Checks
    assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
    assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
    if isinstance(prediction, (list, tuple)):  # YOLOv8 model in validation model, output = (inference_out, loss_out)
        prediction = prediction[0]  # select only inference output
    if classes is not None:
        classes = torch.tensor(classes, device=prediction.device)

    if prediction.shape[-1] == 6 or end2end:  # end-to-end model (BNC, i.e. 1,300,6)
        output = [pred[pred[:, 4] > conf_thres][:max_det] for pred in prediction]
        if classes is not None:
            output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]
        return output

    bs = prediction.shape[0]  # batch size (BCN, i.e. 1,84,6300)
    nc = nc or (prediction.shape[1] - 4)  # number of classes
    nm = prediction.shape[1] - nc - 4  # number of masks
    mi = 4 + nc  # mask start index
    xc = prediction[:, 4:mi].amax(1) > conf_thres  # candidates
    xinds = torch.stack([torch.arange(len(i), device=prediction.device) for i in xc])[..., None]  # to track idxs

    # Settings
    # min_wh = 2  # (pixels) minimum box width and height
    time_limit = 2.0 + max_time_img * bs  # seconds to quit after
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)

    prediction = prediction.transpose(-1, -2)  # shape(1,84,6300) to shape(1,6300,84)
    if not rotated:
        if in_place:
            prediction[..., :4] = xywh2xyxy(prediction[..., :4])  # xywh to xyxy
        else:
            prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1)  # xywh to xyxy

    t = time.time()
    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
    keepi = [torch.zeros((0, 1), device=prediction.device)] * bs  # to store the kept idxs
    for xi, (x, xk) in enumerate(zip(prediction, xinds)):  # image index, (preds, preds indices)
        # Apply constraints
        # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0  # width-height
        filt = xc[xi]  # confidence
        x, xk = x[filt], xk[filt]

        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]) and not rotated:
            lb = labels[xi]
            v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
            v[:, :4] = xywh2xyxy(lb[:, 1:5])  # box
            v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
            x = torch.cat((x, v), 0)

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Detections matrix nx6 (xyxy, conf, cls)
        box, cls, mask = x.split((4, nc, nm), 1)

        if multi_label:
            i, j = torch.where(cls > conf_thres)
            x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
            xk = xk[i]
        else:  # best class only
            conf, j = cls.max(1, keepdim=True)
            filt = conf.view(-1) > conf_thres
            x = torch.cat((box, conf, j.float(), mask), 1)[filt]
            xk = xk[filt]

        # Filter by class
        if classes is not None:
            filt = (x[:, 5:6] == classes).any(1)
            x, xk = x[filt], xk[filt]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        if n > max_nms:  # excess boxes
            filt = x[:, 4].argsort(descending=True)[:max_nms]  # sort by confidence and remove excess boxes
            x, xk = x[filt], xk[filt]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        scores = x[:, 4]  # scores
        if rotated:
            boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -1:]), dim=-1)  # xywhr
            i = nms_rotated(boxes, scores, iou_thres)
        else:
            boxes = x[:, :4] + c  # boxes (offset by class)
            i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        i = i[:max_det]  # limit detections

        # # Experimental
        # merge = False  # use merge-NMS
        # if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
        #     # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
        #     from .metrics import box_iou
        #     iou = box_iou(boxes[i], boxes) > iou_thres  # IoU matrix
        #     weights = iou * scores[None]  # box weights
        #     x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
        #     redundant = True  # require redundant detections
        #     if redundant:
        #         i = i[iou.sum(1) > 1]  # require redundancy

        output[xi], keepi[xi] = x[i], xk[i].reshape(-1)
        if (time.time() - t) > time_limit:
            LOGGER.warning(f"NMS time limit {time_limit:.3f}s exceeded")
            break  # time limit exceeded

    return (output, keepi) if return_idxs else output





ultralytics.utils.ops.clip_boxes

clip_boxes(boxes, shape)

Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.

Parameters:

Name Type Description Default
boxes Tensor | ndarray

The bounding boxes to clip.

required
shape tuple

The shape of the image.

required

Returns:

Type Description
Tensor | ndarray

The clipped boxes.

Source code in ultralytics/utils/ops.py
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
def clip_boxes(boxes, shape):
    """
    Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.

    Args:
        boxes (torch.Tensor | numpy.ndarray): The bounding boxes to clip.
        shape (tuple): The shape of the image.

    Returns:
        (torch.Tensor | numpy.ndarray): The clipped boxes.
    """
    if isinstance(boxes, torch.Tensor):  # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
        boxes[..., 0] = boxes[..., 0].clamp(0, shape[1])  # x1
        boxes[..., 1] = boxes[..., 1].clamp(0, shape[0])  # y1
        boxes[..., 2] = boxes[..., 2].clamp(0, shape[1])  # x2
        boxes[..., 3] = boxes[..., 3].clamp(0, shape[0])  # y2
    else:  # np.array (faster grouped)
        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
    return boxes





ultralytics.utils.ops.clip_coords

clip_coords(coords, shape)

Clip line coordinates to the image boundaries.

Parameters:

Name Type Description Default
coords Tensor | ndarray

A list of line coordinates.

required
shape tuple

A tuple of integers representing the size of the image in the format (height, width).

required

Returns:

Type Description
Tensor | ndarray

Clipped coordinates.

Source code in ultralytics/utils/ops.py
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
def clip_coords(coords, shape):
    """
    Clip line coordinates to the image boundaries.

    Args:
        coords (torch.Tensor | numpy.ndarray): A list of line coordinates.
        shape (tuple): A tuple of integers representing the size of the image in the format (height, width).

    Returns:
        (torch.Tensor | numpy.ndarray): Clipped coordinates.
    """
    if isinstance(coords, torch.Tensor):  # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
        coords[..., 0] = coords[..., 0].clamp(0, shape[1])  # x
        coords[..., 1] = coords[..., 1].clamp(0, shape[0])  # y
    else:  # np.array (faster grouped)
        coords[..., 0] = coords[..., 0].clip(0, shape[1])  # x
        coords[..., 1] = coords[..., 1].clip(0, shape[0])  # y
    return coords





ultralytics.utils.ops.scale_image

scale_image(masks, im0_shape, ratio_pad=None)

Takes a mask, and resizes it to the original image size.

Parameters:

Name Type Description Default
masks ndarray

Resized and padded masks/images, [h, w, num]/[h, w, 3].

required
im0_shape tuple

The original image shape.

required
ratio_pad tuple

The ratio of the padding to the original image.

None

Returns:

Name Type Description
masks ndarray

The masks that are being returned with shape [h, w, num].

Source code in ultralytics/utils/ops.py
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
def scale_image(masks, im0_shape, ratio_pad=None):
    """
    Takes a mask, and resizes it to the original image size.

    Args:
        masks (np.ndarray): Resized and padded masks/images, [h, w, num]/[h, w, 3].
        im0_shape (tuple): The original image shape.
        ratio_pad (tuple): The ratio of the padding to the original image.

    Returns:
        masks (np.ndarray): The masks that are being returned with shape [h, w, num].
    """
    # Rescale coordinates (xyxy) from im1_shape to im0_shape
    im1_shape = masks.shape
    if im1_shape[:2] == im0_shape[:2]:
        return masks
    if ratio_pad is None:  # calculate from im0_shape
        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
    else:
        # gain = ratio_pad[0][0]
        pad = ratio_pad[1]
    top, left = int(pad[1]), int(pad[0])  # y, x
    bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])

    if len(masks.shape) < 2:
        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
    masks = masks[top:bottom, left:right]
    masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
    if len(masks.shape) == 2:
        masks = masks[:, :, None]

    return masks





ultralytics.utils.ops.xyxy2xywh

xyxy2xywh(x)

Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner.

Parameters:

Name Type Description Default
x ndarray | Tensor

The input bounding box coordinates in (x1, y1, x2, y2) format.

required

Returns:

Name Type Description
y ndarray | Tensor

The bounding box coordinates in (x, y, width, height) format.

Source code in ultralytics/utils/ops.py
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
def xyxy2xywh(x):
    """
    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is the
    top-left corner and (x2, y2) is the bottom-right corner.

    Args:
        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.

    Returns:
        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
    """
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    y = empty_like(x)  # faster than clone/copy
    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
    y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
    y[..., 2] = x[..., 2] - x[..., 0]  # width
    y[..., 3] = x[..., 3] - x[..., 1]  # height
    return y





ultralytics.utils.ops.xywh2xyxy

xywh2xyxy(x)

Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.

Parameters:

Name Type Description Default
x ndarray | Tensor

The input bounding box coordinates in (x, y, width, height) format.

required

Returns:

Name Type Description
y ndarray | Tensor

The bounding box coordinates in (x1, y1, x2, y2) format.

Source code in ultralytics/utils/ops.py
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
def xywh2xyxy(x):
    """
    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
    top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.

    Args:
        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.

    Returns:
        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
    """
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    y = empty_like(x)  # faster than clone/copy
    xy = x[..., :2]  # centers
    wh = x[..., 2:] / 2  # half width-height
    y[..., :2] = xy - wh  # top left xy
    y[..., 2:] = xy + wh  # bottom right xy
    return y





ultralytics.utils.ops.xywhn2xyxy

xywhn2xyxy(x, w=640, h=640, padw=0, padh=0)

Convert normalized bounding box coordinates to pixel coordinates.

Parameters:

Name Type Description Default
x ndarray | Tensor

The bounding box coordinates.

required
w int

Width of the image.

640
h int

Height of the image.

640
padw int

Padding width.

0
padh int

Padding height.

0

Returns:

Name Type Description
y ndarray | Tensor

The coordinates of the bounding box in the format [x1, y1, x2, y2] where x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.

Source code in ultralytics/utils/ops.py
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
    """
    Convert normalized bounding box coordinates to pixel coordinates.

    Args:
        x (np.ndarray | torch.Tensor): The bounding box coordinates.
        w (int): Width of the image.
        h (int): Height of the image.
        padw (int): Padding width.
        padh (int): Padding height.

    Returns:
        y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where
            x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
    """
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    y = empty_like(x)  # faster than clone/copy
    y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
    y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y
    y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw  # bottom right x
    y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh  # bottom right y
    return y





ultralytics.utils.ops.xyxy2xywhn

xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0)

Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y, width and height are normalized to image dimensions.

Parameters:

Name Type Description Default
x ndarray | Tensor

The input bounding box coordinates in (x1, y1, x2, y2) format.

required
w int

The width of the image.

640
h int

The height of the image.

640
clip bool

If True, the boxes will be clipped to the image boundaries.

False
eps float

The minimum value of the box's width and height.

0.0

Returns:

Name Type Description
y ndarray | Tensor

The bounding box coordinates in (x, y, width, height, normalized) format

Source code in ultralytics/utils/ops.py
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
    """
    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
    width and height are normalized to image dimensions.

    Args:
        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
        w (int): The width of the image.
        h (int): The height of the image.
        clip (bool): If True, the boxes will be clipped to the image boundaries.
        eps (float): The minimum value of the box's width and height.

    Returns:
        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height, normalized) format
    """
    if clip:
        x = clip_boxes(x, (h - eps, w - eps))
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    y = empty_like(x)  # faster than clone/copy
    y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w  # x center
    y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h  # y center
    y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width
    y[..., 3] = (x[..., 3] - x[..., 1]) / h  # height
    return y





ultralytics.utils.ops.xywh2ltwh

xywh2ltwh(x)

Convert the bounding box format from [x, y, w, h] to [x1, y1, w, h], where x1, y1 are the top-left coordinates.

Parameters:

Name Type Description Default
x ndarray | Tensor

The input tensor with the bounding box coordinates in the xywh format

required

Returns:

Name Type Description
y ndarray | Tensor

The bounding box coordinates in the xyltwh format

Source code in ultralytics/utils/ops.py
512
513
514
515
516
517
518
519
520
521
522
523
524
525
def xywh2ltwh(x):
    """
    Convert the bounding box format from [x, y, w, h] to [x1, y1, w, h], where x1, y1 are the top-left coordinates.

    Args:
        x (np.ndarray | torch.Tensor): The input tensor with the bounding box coordinates in the xywh format

    Returns:
        y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
    return y





ultralytics.utils.ops.xyxy2ltwh

xyxy2ltwh(x)

Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right.

Parameters:

Name Type Description Default
x ndarray | Tensor

The input tensor with the bounding boxes coordinates in the xyxy format

required

Returns:

Name Type Description
y ndarray | Tensor

The bounding box coordinates in the xyltwh format.

Source code in ultralytics/utils/ops.py
528
529
530
531
532
533
534
535
536
537
538
539
540
541
def xyxy2ltwh(x):
    """
    Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right.

    Args:
        x (np.ndarray | torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format

    Returns:
        y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 2] = x[..., 2] - x[..., 0]  # width
    y[..., 3] = x[..., 3] - x[..., 1]  # height
    return y





ultralytics.utils.ops.ltwh2xywh

ltwh2xywh(x)

Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.

Parameters:

Name Type Description Default
x Tensor

the input tensor

required

Returns:

Name Type Description
y ndarray | Tensor

The bounding box coordinates in the xywh format.

Source code in ultralytics/utils/ops.py
544
545
546
547
548
549
550
551
552
553
554
555
556
557
def ltwh2xywh(x):
    """
    Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.

    Args:
        x (torch.Tensor): the input tensor

    Returns:
        y (np.ndarray | torch.Tensor): The bounding box coordinates in the xywh format.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] + x[..., 2] / 2  # center x
    y[..., 1] = x[..., 1] + x[..., 3] / 2  # center y
    return y





ultralytics.utils.ops.xyxyxyxy2xywhr

xyxyxyxy2xywhr(x)

Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation]. Rotation values are returned in radians from 0 to pi/2.

Parameters:

Name Type Description Default
x ndarray | Tensor

Input box corners [xy1, xy2, xy3, xy4] of shape (n, 8).

required

Returns:

Type Description
ndarray | Tensor

Converted data in [cx, cy, w, h, rotation] format of shape (n, 5).

Source code in ultralytics/utils/ops.py
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
def xyxyxyxy2xywhr(x):
    """
    Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation]. Rotation values are
    returned in radians from 0 to pi/2.

    Args:
        x (numpy.ndarray | torch.Tensor): Input box corners [xy1, xy2, xy3, xy4] of shape (n, 8).

    Returns:
        (numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format of shape (n, 5).
    """
    is_torch = isinstance(x, torch.Tensor)
    points = x.cpu().numpy() if is_torch else x
    points = points.reshape(len(x), -1, 2)
    rboxes = []
    for pts in points:
        # NOTE: Use cv2.minAreaRect to get accurate xywhr,
        # especially some objects are cut off by augmentations in dataloader.
        (cx, cy), (w, h), angle = cv2.minAreaRect(pts)
        rboxes.append([cx, cy, w, h, angle / 180 * np.pi])
    return torch.tensor(rboxes, device=x.device, dtype=x.dtype) if is_torch else np.asarray(rboxes)





ultralytics.utils.ops.xywhr2xyxyxyxy

xywhr2xyxyxyxy(x)

Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4]. Rotation values should be in radians from 0 to pi/2.

Parameters:

Name Type Description Default
x ndarray | Tensor

Boxes in [cx, cy, w, h, rotation] format of shape (n, 5) or (b, n, 5).

required

Returns:

Type Description
ndarray | Tensor

Converted corner points of shape (n, 4, 2) or (b, n, 4, 2).

Source code in ultralytics/utils/ops.py
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
def xywhr2xyxyxyxy(x):
    """
    Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4]. Rotation values should
    be in radians from 0 to pi/2.

    Args:
        x (numpy.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format of shape (n, 5) or (b, n, 5).

    Returns:
        (numpy.ndarray | torch.Tensor): Converted corner points of shape (n, 4, 2) or (b, n, 4, 2).
    """
    cos, sin, cat, stack = (
        (torch.cos, torch.sin, torch.cat, torch.stack)
        if isinstance(x, torch.Tensor)
        else (np.cos, np.sin, np.concatenate, np.stack)
    )

    ctr = x[..., :2]
    w, h, angle = (x[..., i : i + 1] for i in range(2, 5))
    cos_value, sin_value = cos(angle), sin(angle)
    vec1 = [w / 2 * cos_value, w / 2 * sin_value]
    vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
    vec1 = cat(vec1, -1)
    vec2 = cat(vec2, -1)
    pt1 = ctr + vec1 + vec2
    pt2 = ctr + vec1 - vec2
    pt3 = ctr - vec1 - vec2
    pt4 = ctr - vec1 + vec2
    return stack([pt1, pt2, pt3, pt4], -2)





ultralytics.utils.ops.ltwh2xyxy

ltwh2xyxy(x)

Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.

Parameters:

Name Type Description Default
x ndarray | Tensor

The input image.

required

Returns:

Type Description
ndarray | Tensor

The xyxy coordinates of the bounding boxes.

Source code in ultralytics/utils/ops.py
614
615
616
617
618
619
620
621
622
623
624
625
626
627
def ltwh2xyxy(x):
    """
    Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.

    Args:
        x (np.ndarray | torch.Tensor): The input image.

    Returns:
        (np.ndarray | torch.Tensor): The xyxy coordinates of the bounding boxes.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 2] = x[..., 2] + x[..., 0]  # width
    y[..., 3] = x[..., 3] + x[..., 1]  # height
    return y





ultralytics.utils.ops.segments2boxes

segments2boxes(segments)

Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).

Parameters:

Name Type Description Default
segments list

List of segments, each segment is a list of points, each point is a list of x, y coordinates.

required

Returns:

Type Description
ndarray

The xywh coordinates of the bounding boxes.

Source code in ultralytics/utils/ops.py
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
def segments2boxes(segments):
    """
    Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).

    Args:
        segments (list): List of segments, each segment is a list of points, each point is a list of x, y coordinates.

    Returns:
        (np.ndarray): The xywh coordinates of the bounding boxes.
    """
    boxes = []
    for s in segments:
        x, y = s.T  # segment xy
        boxes.append([x.min(), y.min(), x.max(), y.max()])  # cls, xyxy
    return xyxy2xywh(np.array(boxes))  # cls, xywh





ultralytics.utils.ops.resample_segments

resample_segments(segments, n=1000)

Inputs a list of segments (n,2) and returns a list of segments (n,2) up-sampled to n points each.

Parameters:

Name Type Description Default
segments list

A list of (n,2) arrays, where n is the number of points in the segment.

required
n int

Number of points to resample the segment to.

1000

Returns:

Name Type Description
segments list

The resampled segments.

Source code in ultralytics/utils/ops.py
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
def resample_segments(segments, n=1000):
    """
    Inputs a list of segments (n,2) and returns a list of segments (n,2) up-sampled to n points each.

    Args:
        segments (list): A list of (n,2) arrays, where n is the number of points in the segment.
        n (int): Number of points to resample the segment to.

    Returns:
        segments (list): The resampled segments.
    """
    for i, s in enumerate(segments):
        if len(s) == n:
            continue
        s = np.concatenate((s, s[0:1, :]), axis=0)
        x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n)
        xp = np.arange(len(s))
        x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x
        segments[i] = (
            np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T
        )  # segment xy
    return segments





ultralytics.utils.ops.crop_mask

crop_mask(masks, boxes)

Crop masks to bounding boxes.

Parameters:

Name Type Description Default
masks Tensor

[n, h, w] tensor of masks.

required
boxes Tensor

[n, 4] tensor of bbox coordinates in relative point form.

required

Returns:

Type Description
Tensor

Cropped masks.

Source code in ultralytics/utils/ops.py
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
def crop_mask(masks, boxes):
    """
    Crop masks to bounding boxes.

    Args:
        masks (torch.Tensor): [n, h, w] tensor of masks.
        boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form.

    Returns:
        (torch.Tensor): Cropped masks.
    """
    _, h, w = masks.shape
    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(n,1,1)
    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,1,w)
    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(1,h,1)

    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))





ultralytics.utils.ops.process_mask

process_mask(protos, masks_in, bboxes, shape, upsample=False)

Apply masks to bounding boxes using the output of the mask head.

Parameters:

Name Type Description Default
protos Tensor

A tensor of shape [mask_dim, mask_h, mask_w].

required
masks_in Tensor

A tensor of shape [n, mask_dim], where n is the number of masks after NMS.

required
bboxes Tensor

A tensor of shape [n, 4], where n is the number of masks after NMS.

required
shape tuple

A tuple of integers representing the size of the input image in the format (h, w).

required
upsample bool

A flag to indicate whether to upsample the mask to the original image size.

False

Returns:

Type Description
Tensor

A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w are the height and width of the input image. The mask is applied to the bounding boxes.

Source code in ultralytics/utils/ops.py
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
    """
    Apply masks to bounding boxes using the output of the mask head.

    Args:
        protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
        masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
        bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
        shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
        upsample (bool): A flag to indicate whether to upsample the mask to the original image size.

    Returns:
        (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
            are the height and width of the input image. The mask is applied to the bounding boxes.
    """
    c, mh, mw = protos.shape  # CHW
    ih, iw = shape
    masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)  # CHW
    width_ratio = mw / iw
    height_ratio = mh / ih

    downsampled_bboxes = bboxes.clone()
    downsampled_bboxes[:, 0] *= width_ratio
    downsampled_bboxes[:, 2] *= width_ratio
    downsampled_bboxes[:, 3] *= height_ratio
    downsampled_bboxes[:, 1] *= height_ratio

    masks = crop_mask(masks, downsampled_bboxes)  # CHW
    if upsample:
        masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0]  # CHW
    return masks.gt_(0.0)





ultralytics.utils.ops.process_mask_native

process_mask_native(protos, masks_in, bboxes, shape)

Apply masks to bounding boxes using the output of the mask head with native upsampling.

Parameters:

Name Type Description Default
protos Tensor

[mask_dim, mask_h, mask_w].

required
masks_in Tensor

[n, mask_dim], n is number of masks after nms.

required
bboxes Tensor

[n, 4], n is number of masks after nms.

required
shape tuple

The size of the input image (h,w).

required

Returns:

Type Description
Tensor

The returned masks with dimensions [h, w, n].

Source code in ultralytics/utils/ops.py
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
def process_mask_native(protos, masks_in, bboxes, shape):
    """
    Apply masks to bounding boxes using the output of the mask head with native upsampling.

    Args:
        protos (torch.Tensor): [mask_dim, mask_h, mask_w].
        masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms.
        bboxes (torch.Tensor): [n, 4], n is number of masks after nms.
        shape (tuple): The size of the input image (h,w).

    Returns:
        (torch.Tensor): The returned masks with dimensions [h, w, n].
    """
    c, mh, mw = protos.shape  # CHW
    masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
    masks = scale_masks(masks[None], shape)[0]  # CHW
    masks = crop_mask(masks, bboxes)  # CHW
    return masks.gt_(0.0)





ultralytics.utils.ops.scale_masks

scale_masks(masks, shape, padding=True)

Rescale segment masks to shape.

Parameters:

Name Type Description Default
masks Tensor

(N, C, H, W).

required
shape tuple

Height and width.

required
padding bool

If True, assuming the boxes is based on image augmented by yolo style. If False then do regular rescaling.

True

Returns:

Type Description
Tensor

Rescaled masks.

Source code in ultralytics/utils/ops.py
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
def scale_masks(masks, shape, padding=True):
    """
    Rescale segment masks to shape.

    Args:
        masks (torch.Tensor): (N, C, H, W).
        shape (tuple): Height and width.
        padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
            rescaling.

    Returns:
        (torch.Tensor): Rescaled masks.
    """
    mh, mw = masks.shape[2:]
    gain = min(mh / shape[0], mw / shape[1])  # gain  = old / new
    pad = [mw - shape[1] * gain, mh - shape[0] * gain]  # wh padding
    if padding:
        pad[0] /= 2
        pad[1] /= 2
    top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0)  # y, x
    bottom, right = (int(mh - pad[1]), int(mw - pad[0]))
    masks = masks[..., top:bottom, left:right]

    masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False)  # NCHW
    return masks





ultralytics.utils.ops.scale_coords

scale_coords(
    img1_shape,
    coords,
    img0_shape,
    ratio_pad=None,
    normalize=False,
    padding=True,
)

Rescale segment coordinates (xy) from img1_shape to img0_shape.

Parameters:

Name Type Description Default
img1_shape tuple

The shape of the image that the coords are from.

required
coords Tensor

The coords to be scaled of shape n,2.

required
img0_shape tuple

The shape of the image that the segmentation is being applied to.

required
ratio_pad tuple

The ratio of the image size to the padded image size.

None
normalize bool

If True, the coordinates will be normalized to the range [0, 1].

False
padding bool

If True, assuming the boxes is based on image augmented by yolo style. If False then do regular rescaling.

True

Returns:

Name Type Description
coords Tensor

The scaled coordinates.

Source code in ultralytics/utils/ops.py
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True):
    """
    Rescale segment coordinates (xy) from img1_shape to img0_shape.

    Args:
        img1_shape (tuple): The shape of the image that the coords are from.
        coords (torch.Tensor): The coords to be scaled of shape n,2.
        img0_shape (tuple): The shape of the image that the segmentation is being applied to.
        ratio_pad (tuple): The ratio of the image size to the padded image size.
        normalize (bool): If True, the coordinates will be normalized to the range [0, 1].
        padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
            rescaling.

    Returns:
        coords (torch.Tensor): The scaled coordinates.
    """
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    if padding:
        coords[..., 0] -= pad[0]  # x padding
        coords[..., 1] -= pad[1]  # y padding
    coords[..., 0] /= gain
    coords[..., 1] /= gain
    coords = clip_coords(coords, img0_shape)
    if normalize:
        coords[..., 0] /= img0_shape[1]  # width
        coords[..., 1] /= img0_shape[0]  # height
    return coords





ultralytics.utils.ops.regularize_rboxes

regularize_rboxes(rboxes)

Regularize rotated boxes in range [0, pi/2].

Parameters:

Name Type Description Default
rboxes Tensor

Input boxes of shape(N, 5) in xywhr format.

required

Returns:

Type Description
Tensor

The regularized boxes.

Source code in ultralytics/utils/ops.py
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
def regularize_rboxes(rboxes):
    """
    Regularize rotated boxes in range [0, pi/2].

    Args:
        rboxes (torch.Tensor): Input boxes of shape(N, 5) in xywhr format.

    Returns:
        (torch.Tensor): The regularized boxes.
    """
    x, y, w, h, t = rboxes.unbind(dim=-1)
    # Swap edge if t >= pi/2 while not being symmetrically opposite
    swap = t % math.pi >= math.pi / 2
    w_ = torch.where(swap, h, w)
    h_ = torch.where(swap, w, h)
    t = t % (math.pi / 2)
    return torch.stack([x, y, w_, h_, t], dim=-1)  # regularized boxes





ultralytics.utils.ops.masks2segments

masks2segments(masks, strategy='all')

Convert masks to segments.

Parameters:

Name Type Description Default
masks Tensor

The output of the model, which is a tensor of shape (batch_size, 160, 160).

required
strategy str

'all' or 'largest'.

'all'

Returns:

Type Description
list

List of segment masks.

Source code in ultralytics/utils/ops.py
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
def masks2segments(masks, strategy="all"):
    """
    Convert masks to segments.

    Args:
        masks (torch.Tensor): The output of the model, which is a tensor of shape (batch_size, 160, 160).
        strategy (str): 'all' or 'largest'.

    Returns:
        (list): List of segment masks.
    """
    from ultralytics.data.converter import merge_multi_segment

    segments = []
    for x in masks.int().cpu().numpy().astype("uint8"):
        c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
        if c:
            if strategy == "all":  # merge and concatenate all segments
                c = (
                    np.concatenate(merge_multi_segment([x.reshape(-1, 2) for x in c]))
                    if len(c) > 1
                    else c[0].reshape(-1, 2)
                )
            elif strategy == "largest":  # select largest segment
                c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
        else:
            c = np.zeros((0, 2))  # no segments found
        segments.append(c.astype("float32"))
    return segments





ultralytics.utils.ops.convert_torch2numpy_batch

convert_torch2numpy_batch(batch: Tensor) -> np.ndarray

Convert a batch of FP32 torch tensors (0.0-1.0) to a NumPy uint8 array (0-255), changing from BCHW to BHWC layout.

Parameters:

Name Type Description Default
batch Tensor

Input tensor batch of shape (Batch, Channels, Height, Width) and dtype torch.float32.

required

Returns:

Type Description
ndarray

Output NumPy array batch of shape (Batch, Height, Width, Channels) and dtype uint8.

Source code in ultralytics/utils/ops.py
855
856
857
858
859
860
861
862
863
864
865
def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
    """
    Convert a batch of FP32 torch tensors (0.0-1.0) to a NumPy uint8 array (0-255), changing from BCHW to BHWC layout.

    Args:
        batch (torch.Tensor): Input tensor batch of shape (Batch, Channels, Height, Width) and dtype torch.float32.

    Returns:
        (np.ndarray): Output NumPy array batch of shape (Batch, Height, Width, Channels) and dtype uint8.
    """
    return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).to(torch.uint8).cpu().numpy()





ultralytics.utils.ops.clean_str

clean_str(s)

Cleans a string by replacing special characters with '_' character.

Parameters:

Name Type Description Default
s str

A string needing special characters replaced.

required

Returns:

Type Description
str

A string with special characters replaced by an underscore _.

Source code in ultralytics/utils/ops.py
868
869
870
871
872
873
874
875
876
877
878
def clean_str(s):
    """
    Cleans a string by replacing special characters with '_' character.

    Args:
        s (str): A string needing special characters replaced.

    Returns:
        (str): A string with special characters replaced by an underscore _.
    """
    return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)





ultralytics.utils.ops.empty_like

empty_like(x)

Creates empty torch.Tensor or np.ndarray with same shape as input and float32 dtype.

Source code in ultralytics/utils/ops.py
881
882
883
884
885
def empty_like(x):
    """Creates empty torch.Tensor or np.ndarray with same shape as input and float32 dtype."""
    return (
        torch.empty_like(x, dtype=torch.float32) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=np.float32)
    )





📅 Created 1 year ago ✏️ Updated 4 months ago