Link to this sectionReference for ultralytics/utils/ops.py#
This page is sourced from https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py. Have an improvement or example to add? Open a Pull Request — thank you! 🙏
Link to this section ultralytics.utils.ops.Profile#
Profile(self, t: float = 0.0, device: torch.device | None = None)Bases: contextlib.ContextDecorator
Ultralytics Profile class for timing code execution.
Use as a decorator with @Profile() or as a context manager with 'with Profile():'. Provides accurate timing measurements with CUDA synchronization support for GPU operations.
Args
| Name | Type | Description | Default |
|---|---|---|---|
t | float | Initial accumulated time in seconds. | 0.0 |
device | torch.device, optional | Device used for model inference to enable CUDA synchronization. | None |
Attributes
| Name | Type | Description |
|---|---|---|
t | float | Accumulated time in seconds. |
device | torch.device | Device used for model inference. |
cuda | bool | Whether CUDA is being used for timing synchronization. |
Methods
| Name | Description |
|---|---|
__enter__ | Start timing. |
__exit__ | Stop timing. |
__str__ | Return a human-readable string representing the accumulated elapsed time. |
time | Get current time with CUDA synchronization if applicable. |
Examples
Use as a context manager to time code execution
>>> with Profile() as dt:
... pass # slow operation here
>>> str(dt).startswith("Elapsed time is ")
True
Use as a decorator to time function execution
>>> @Profile()
... def slow_function():
... time.sleep(0.1)Source code in ultralytics/utils/ops.py
class Profile(contextlib.ContextDecorator):
"""Ultralytics Profile class for timing code execution.
Use as a decorator with @Profile() or as a context manager with 'with Profile():'. Provides accurate timing
measurements with CUDA synchronization support for GPU operations.
Attributes:
t (float): Accumulated time in seconds.
device (torch.device): Device used for model inference.
cuda (bool): Whether CUDA is being used for timing synchronization.
Examples:
Use as a context manager to time code execution
>>> with Profile() as dt:
... pass # slow operation here
>>> str(dt).startswith("Elapsed time is ")
True
Use as a decorator to time function execution
>>> @Profile()
... def slow_function():
... time.sleep(0.1)
"""
def __init__(self, t: float = 0.0, device: torch.device | None = None):
"""Initialize the Profile class.
Args:
t (float): Initial accumulated time in seconds.
device (torch.device, optional): Device used for model inference to enable CUDA synchronization.
"""
self.t = t
self.device = device
self.cuda = bool(device and str(device).startswith("cuda"))Link to this section ultralytics.utils.ops.Profile.__enter__#
def __enter__(self)Start timing.
Source code in ultralytics/utils/ops.py
def __enter__(self):
"""Start timing."""
self.start = self.time()
return selfLink to this section ultralytics.utils.ops.Profile.__exit__#
def __exit__(self, type, value, traceback)Stop timing.
Args
| Name | Type | Description | Default |
|---|---|---|---|
type | required | ||
value | required | ||
traceback | required |
Source code in ultralytics/utils/ops.py
def __exit__(self, type, value, traceback):
"""Stop timing."""
self.dt = self.time() - self.start # delta-time
self.t += self.dt # accumulate dtLink to this section ultralytics.utils.ops.Profile.__str__#
def __str__(self)Return a human-readable string representing the accumulated elapsed time.
Source code in ultralytics/utils/ops.py
def __str__(self):
"""Return a human-readable string representing the accumulated elapsed time."""
return f"Elapsed time is {self.t} s"Link to this section ultralytics.utils.ops.Profile.time#
def time(self)Get current time with CUDA synchronization if applicable.
Source code in ultralytics/utils/ops.py
def time(self):
"""Get current time with CUDA synchronization if applicable."""
if self.cuda:
torch.cuda.synchronize(self.device)
return time.perf_counter()Link to this section ultralytics.utils.ops.segment2box#
def segment2box(segment: np.ndarray, width: int = 640, height: int = 640) -> np.ndarrayConvert segment coordinates to bounding box coordinates.
Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates. Applies inside-image constraint and clips coordinates when necessary.
Args
| Name | Type | Description | Default |
|---|---|---|---|
segment | np.ndarray | Segment coordinates in format (N, 2) where N is number of points. | required |
width | int | Width of the image in pixels. | 640 |
height | int | Height of the image in pixels. | 640 |
Returns
| Type | Description |
|---|---|
np.ndarray | Bounding box coordinates in xyxy format [x1, y1, x2, y2]. |
Source code in ultralytics/utils/ops.py
def segment2box(segment: np.ndarray, width: int = 640, height: int = 640) -> np.ndarray:
"""Convert segment coordinates to bounding box coordinates.
Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates. Applies
inside-image constraint and clips coordinates when necessary.
Args:
segment (np.ndarray): Segment coordinates in format (N, 2) where N is number of points.
width (int): Width of the image in pixels.
height (int): Height of the image in pixels.
Returns:
(np.ndarray): Bounding box coordinates in xyxy format [x1, y1, x2, y2].
"""
x, y = segment.T # segment xy
# Clip coordinates if 3 out of 4 sides are outside the image
if np.array([x.min() < 0, y.min() < 0, x.max() > width, y.max() > height]).sum() >= 3:
x = x.clip(0, width)
y = y.clip(0, height)
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
x = x[inside]
y = y[inside]
return (
np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype)
if any(x)
else np.zeros(4, dtype=segment.dtype)
) # xyxyLink to this section ultralytics.utils.ops.scale_boxes#
def scale_boxes(
img1_shape: tuple[int, int],
boxes: torch.Tensor | np.ndarray,
img0_shape: tuple[int, int],
ratio_pad: tuple | None = None,
padding: bool = True,
xywh: bool = False,
) -> torch.Tensor | np.ndarrayRescale bounding boxes from one image shape to another.
Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes. Supports both xyxy and xywh box formats.
Args
| Name | Type | Description | Default |
|---|---|---|---|
img1_shape | tuple[int, int] | Shape of the source image (height, width). | required |
boxes | `torch.Tensor | np.ndarray` | Bounding boxes to rescale in format (N, 4). |
img0_shape | tuple[int, int] | Shape of the target image (height, width). | required |
ratio_pad | tuple, optional | Tuple of (ratio, pad) for scaling. If None, calculated from image shapes. | None |
padding | bool | Whether boxes are based on YOLO-style augmented images with padding. | True |
xywh | bool | Whether box format is xywh (True) or xyxy (False). | False |
Returns
| Type | Description |
|---|---|
| `torch.Tensor | np.ndarray` |
Source code in ultralytics/utils/ops.py
def scale_boxes(
img1_shape: tuple[int, int],
boxes: torch.Tensor | np.ndarray,
img0_shape: tuple[int, int],
ratio_pad: tuple | None = None,
padding: bool = True,
xywh: bool = False,
) -> torch.Tensor | np.ndarray:
"""Rescale bounding boxes from one image shape to another.
Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes. Supports
both xyxy and xywh box formats.
Args:
img1_shape (tuple[int, int]): Shape of the source image (height, width).
boxes (torch.Tensor | np.ndarray): Bounding boxes to rescale in format (N, 4).
img0_shape (tuple[int, int]): Shape of the target image (height, width).
ratio_pad (tuple, optional): Tuple of (ratio, pad) for scaling. If None, calculated from image shapes.
padding (bool): Whether boxes are based on YOLO-style augmented images with padding.
xywh (bool): Whether box format is xywh (True) or xyxy (False).
Returns:
(torch.Tensor | np.ndarray): Rescaled bounding boxes in the same format as input.
"""
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad_x = round((img1_shape[1] - round(img0_shape[1] * gain)) / 2 - 0.1)
pad_y = round((img1_shape[0] - round(img0_shape[0] * gain)) / 2 - 0.1)
else:
gain = ratio_pad[0][0]
pad_x, pad_y = ratio_pad[1]
if padding:
boxes[..., 0] -= pad_x # x padding
boxes[..., 1] -= pad_y # y padding
if not xywh:
boxes[..., 2] -= pad_x # x padding
boxes[..., 3] -= pad_y # y padding
boxes[..., :4] /= gain
return boxes if xywh else clip_boxes(boxes, img0_shape)Link to this section ultralytics.utils.ops.make_divisible#
def make_divisible(x: int, divisor)Return the smallest number >= x that is divisible by the given divisor.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | int | The number to make divisible. | required |
divisor | `int | torch.Tensor` | The divisor. |
Returns
| Type | Description |
|---|---|
int | The smallest number >= x divisible by the divisor. |
Source code in ultralytics/utils/ops.py
def make_divisible(x: int, divisor):
"""Return the smallest number >= x that is divisible by the given divisor.
Args:
x (int): The number to make divisible.
divisor (int | torch.Tensor): The divisor.
Returns:
(int): The smallest number >= x divisible by the divisor.
"""
if isinstance(divisor, torch.Tensor):
divisor = int(divisor.max()) # to int
return math.ceil(x / divisor) * divisorLink to this section ultralytics.utils.ops.clip_boxes#
def clip_boxes(boxes, shape)Clip bounding boxes to image boundaries.
Args
| Name | Type | Description | Default |
|---|---|---|---|
boxes | `torch.Tensor | np.ndarray` | Bounding boxes to clip. |
shape | tuple | Image shape as HWC or HW (supports both). | required |
Returns
| Type | Description |
|---|---|
| `torch.Tensor | np.ndarray` |
Source code in ultralytics/utils/ops.py
def clip_boxes(boxes, shape):
"""Clip bounding boxes to image boundaries.
Args:
boxes (torch.Tensor | np.ndarray): Bounding boxes to clip.
shape (tuple): Image shape as HWC or HW (supports both).
Returns:
(torch.Tensor | np.ndarray): Clipped bounding boxes.
"""
h, w = shape[:2] # supports both HWC or HW shapes
if isinstance(boxes, torch.Tensor): # faster individually
if NOT_MACOS14:
boxes[..., 0].clamp_(0, w) # x1
boxes[..., 1].clamp_(0, h) # y1
boxes[..., 2].clamp_(0, w) # x2
boxes[..., 3].clamp_(0, h) # y2
else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
boxes[..., 0] = boxes[..., 0].clamp(0, w)
boxes[..., 1] = boxes[..., 1].clamp(0, h)
boxes[..., 2] = boxes[..., 2].clamp(0, w)
boxes[..., 3] = boxes[..., 3].clamp(0, h)
else: # np.array (faster grouped)
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, w) # x1, x2
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, h) # y1, y2
return boxesLink to this section ultralytics.utils.ops.clip_coords#
def clip_coords(coords, shape)Clip line coordinates to image boundaries.
Args
| Name | Type | Description | Default |
|---|---|---|---|
coords | `torch.Tensor | np.ndarray` | Line coordinates to clip. |
shape | tuple | Image shape as HWC or HW (supports both). | required |
Returns
| Type | Description |
|---|---|
| `torch.Tensor | np.ndarray` |
Source code in ultralytics/utils/ops.py
def clip_coords(coords, shape):
"""Clip line coordinates to image boundaries.
Args:
coords (torch.Tensor | np.ndarray): Line coordinates to clip.
shape (tuple): Image shape as HWC or HW (supports both).
Returns:
(torch.Tensor | np.ndarray): Clipped coordinates.
"""
h, w = shape[:2] # supports both HWC or HW shapes
if isinstance(coords, torch.Tensor):
if NOT_MACOS14:
coords[..., 0].clamp_(0, w) # x
coords[..., 1].clamp_(0, h) # y
else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
coords[..., 0] = coords[..., 0].clamp(0, w)
coords[..., 1] = coords[..., 1].clamp(0, h)
else: # np.array
coords[..., 0] = coords[..., 0].clip(0, w) # x
coords[..., 1] = coords[..., 1].clip(0, h) # y
return coordsLink to this section ultralytics.utils.ops.xyxy2xywh#
def xyxy2xywh(x)Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is
the top-left corner and (x2, y2) is the bottom-right corner.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Input bounding box coordinates in (x1, y1, x2, y2) format. |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def xyxy2xywh(x):
"""Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is
the top-left corner and (x2, y2) is the bottom-right corner.
Args:
x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
Returns:
(np.ndarray | torch.Tensor): Bounding box coordinates in (x, y, width, height) format.
"""
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
y = empty_like(x) # faster than clone/copy
x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
y[..., 0] = (x1 + x2) / 2 # x center
y[..., 1] = (y1 + y2) / 2 # y center
y[..., 2] = x2 - x1 # width
y[..., 3] = y2 - y1 # height
return yLink to this section ultralytics.utils.ops.xywh2xyxy#
def xywh2xyxy(x)Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is
the top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Input bounding box coordinates in (x, y, width, height) format. |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def xywh2xyxy(x):
"""Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is
the top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.
Args:
x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x, y, width, height) format.
Returns:
(np.ndarray | torch.Tensor): Bounding box coordinates in (x1, y1, x2, y2) format.
"""
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
y = empty_like(x) # faster than clone/copy
xy = x[..., :2] # centers
wh = x[..., 2:] / 2 # half width-height
y[..., :2] = xy - wh # top left xy
y[..., 2:] = xy + wh # bottom right xy
return yLink to this section ultralytics.utils.ops.xywhn2xyxy#
def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0)Convert normalized bounding box coordinates to pixel coordinates.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Normalized bounding box coordinates in (x, y, w, h) format. |
w | int | Image width in pixels. | 640 |
h | int | Image height in pixels. | 640 |
padw | int | Padding width in pixels. | 0 |
padh | int | Padding height in pixels. | 0 |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
"""Convert normalized bounding box coordinates to pixel coordinates.
Args:
x (np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, w, h) format.
w (int): Image width in pixels.
h (int): Image height in pixels.
padw (int): Padding width in pixels.
padh (int): Padding height in pixels.
Returns:
(np.ndarray | torch.Tensor): Bounding box coordinates in (x1, y1, x2, y2) format.
"""
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
y = empty_like(x) # faster than clone/copy
xc, yc, xw, xh = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
half_w, half_h = xw / 2, xh / 2
y[..., 0] = w * (xc - half_w) + padw # top left x
y[..., 1] = h * (yc - half_h) + padh # top left y
y[..., 2] = w * (xc + half_w) + padw # bottom right x
y[..., 3] = h * (yc + half_h) + padh # bottom right y
return yLink to this section ultralytics.utils.ops.xyxy2xywhn#
def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0.0)Convert bounding box coordinates from (x1, y1, x2, y2) format to normalized (x, y, width, height) format. x, y,
width and height are normalized to image dimensions.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Input bounding box coordinates in (x1, y1, x2, y2) format. |
w | int | Image width in pixels. | 640 |
h | int | Image height in pixels. | 640 |
clip | bool | Whether to clip boxes to image boundaries. | False |
eps | float | Minimum value for box width and height. | 0.0 |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0.0):
"""Convert bounding box coordinates from (x1, y1, x2, y2) format to normalized (x, y, width, height) format. x, y,
width and height are normalized to image dimensions.
Args:
x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
w (int): Image width in pixels.
h (int): Image height in pixels.
clip (bool): Whether to clip boxes to image boundaries.
eps (float): Minimum value for box width and height.
Returns:
(np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, width, height) format.
"""
if clip:
x = clip_boxes(x, (h - eps, w - eps))
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
y = empty_like(x) # faster than clone/copy
x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
y[..., 0] = ((x1 + x2) / 2) / w # x center
y[..., 1] = ((y1 + y2) / 2) / h # y center
y[..., 2] = (x2 - x1) / w # width
y[..., 3] = (y2 - y1) / h # height
return yLink to this section ultralytics.utils.ops.xywh2ltwh#
def xywh2ltwh(x)Convert bounding box format from [x, y, w, h] to [x1, y1, w, h] where x1, y1 are top-left coordinates.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Input bounding box coordinates in xywh format. |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def xywh2ltwh(x):
"""Convert bounding box format from [x, y, w, h] to [x1, y1, w, h] where x1, y1 are top-left coordinates.
Args:
x (np.ndarray | torch.Tensor): Input bounding box coordinates in xywh format.
Returns:
(np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
return yLink to this section ultralytics.utils.ops.xyxy2ltwh#
def xyxy2ltwh(x)Convert bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h] format.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Input bounding box coordinates in xyxy format. |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def xyxy2ltwh(x):
"""Convert bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h] format.
Args:
x (np.ndarray | torch.Tensor): Input bounding box coordinates in xyxy format.
Returns:
(np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 2] = x[..., 2] - x[..., 0] # width
y[..., 3] = x[..., 3] - x[..., 1] # height
return yLink to this section ultralytics.utils.ops.ltwh2xywh#
def ltwh2xywh(x)Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Input bounding box coordinates. |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def ltwh2xywh(x):
"""Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
Args:
x (np.ndarray | torch.Tensor): Input bounding box coordinates.
Returns:
(np.ndarray | torch.Tensor): Bounding box coordinates in xywh format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 0] = x[..., 0] + x[..., 2] / 2 # center x
y[..., 1] = x[..., 1] + x[..., 3] / 2 # center y
return yLink to this section ultralytics.utils.ops.xyxyxyxy2xywhr#
def xyxyxyxy2xywhr(x)Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format. |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def xyxyxyxy2xywhr(x):
"""Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.
Args:
x (np.ndarray | torch.Tensor): Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format.
Returns:
(np.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5). Rotation
values are in radians from [-pi/4, 3pi/4).
"""
is_torch = isinstance(x, torch.Tensor)
points = x.cpu().numpy() if is_torch else x
points = points.reshape(len(x), -1, 2)
rboxes = []
for pts in points:
# NOTE: Use cv2.minAreaRect to get accurate xywhr,
# especially some objects are cut off by augmentations in dataloader.
(cx, cy), (w, h), angle = cv2.minAreaRect(pts)
# convert angle to radian and normalize to [-pi/4, 3pi/4)
theta = angle / 180 * np.pi
if w < h:
w, h = h, w
theta += np.pi / 2
while theta >= 3 * np.pi / 4:
theta -= np.pi
while theta < -np.pi / 4:
theta += np.pi
rboxes.append([cx, cy, w, h, theta])
return torch.tensor(rboxes, device=x.device, dtype=x.dtype) if is_torch else np.asarray(rboxes)Link to this section ultralytics.utils.ops.xywhr2xyxyxyxy#
def xywhr2xyxyxyxy(x)Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5). Rotation values should be in radians from [-pi/4, 3pi/4). |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def xywhr2xyxyxyxy(x):
"""Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.
Args:
x (np.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5). Rotation
values should be in radians from [-pi/4, 3pi/4).
Returns:
(np.ndarray | torch.Tensor): Converted corner points with shape (N, 4, 2) or (B, N, 4, 2).
"""
cos, sin, cat, stack = (
(torch.cos, torch.sin, torch.cat, torch.stack)
if isinstance(x, torch.Tensor)
else (np.cos, np.sin, np.concatenate, np.stack)
)
ctr = x[..., :2]
w, h, angle = (x[..., i : i + 1] for i in range(2, 5))
cos_value, sin_value = cos(angle), sin(angle)
vec1 = [w / 2 * cos_value, w / 2 * sin_value]
vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
vec1 = cat(vec1, -1)
vec2 = cat(vec2, -1)
pt1 = ctr + vec1 + vec2
pt2 = ctr + vec1 - vec2
pt3 = ctr - vec1 - vec2
pt4 = ctr - vec1 + vec2
return stack([pt1, pt2, pt3, pt4], -2)Link to this section ultralytics.utils.ops.ltwh2xyxy#
def ltwh2xyxy(x)Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | `np.ndarray | torch.Tensor` | Input bounding box coordinates. |
Returns
| Type | Description |
|---|---|
| `np.ndarray | torch.Tensor` |
Source code in ultralytics/utils/ops.py
def ltwh2xyxy(x):
"""Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
Args:
x (np.ndarray | torch.Tensor): Input bounding box coordinates.
Returns:
(np.ndarray | torch.Tensor): Bounding box coordinates in xyxy format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 2] = x[..., 2] + x[..., 0] # x2
y[..., 3] = x[..., 3] + x[..., 1] # y2
return yLink to this section ultralytics.utils.ops.segments2boxes#
def segments2boxes(segments)Convert segment coordinates to bounding box labels in xywh format.
Args
| Name | Type | Description | Default |
|---|---|---|---|
segments | list | List of segments where each segment is a list of points, each point is [x, y] coordinates. | required |
Returns
| Type | Description |
|---|---|
np.ndarray | Bounding box coordinates in xywh format. |
Source code in ultralytics/utils/ops.py
def segments2boxes(segments):
"""Convert segment coordinates to bounding box labels in xywh format.
Args:
segments (list): List of segments where each segment is a list of points, each point is [x, y] coordinates.
Returns:
(np.ndarray): Bounding box coordinates in xywh format.
"""
boxes = []
for s in segments:
x, y = s.T # segment xy
boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
return xyxy2xywh(np.array(boxes)) # cls, xywhLink to this section ultralytics.utils.ops.resample_segments#
def resample_segments(segments, n: int = 1000)Resample segments to n points each using linear interpolation.
Args
| Name | Type | Description | Default |
|---|---|---|---|
segments | list | List of (N, 2) arrays where N is the number of points in each segment. | required |
n | int | Number of points to resample each segment to. | 1000 |
Returns
| Type | Description |
|---|---|
list | Resampled segments with n points each. |
Source code in ultralytics/utils/ops.py
def resample_segments(segments, n: int = 1000):
"""Resample segments to n points each using linear interpolation.
Args:
segments (list): List of (N, 2) arrays where N is the number of points in each segment.
n (int): Number of points to resample each segment to.
Returns:
(list): Resampled segments with n points each.
"""
for i, s in enumerate(segments):
if len(s) == n:
continue
s = np.concatenate((s, s[0:1, :]), axis=0)
x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n)
xp = np.arange(len(s))
x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x
segments[i] = (
np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T
) # segment xy
return segmentsLink to this section ultralytics.utils.ops.crop_mask#
def crop_mask(masks: torch.Tensor, boxes: torch.Tensor) -> torch.TensorCrop masks to bounding box regions.
Args
| Name | Type | Description | Default |
|---|---|---|---|
masks | torch.Tensor | Masks with shape (N, H, W). | required |
boxes | torch.Tensor | Bounding box coordinates with shape (N, 4) in xyxy pixel format. | required |
Returns
| Type | Description |
|---|---|
torch.Tensor | Cropped masks. |
Source code in ultralytics/utils/ops.py
def crop_mask(masks: torch.Tensor, boxes: torch.Tensor) -> torch.Tensor:
"""Crop masks to bounding box regions.
Args:
masks (torch.Tensor): Masks with shape (N, H, W).
boxes (torch.Tensor): Bounding box coordinates with shape (N, 4) in xyxy pixel format.
Returns:
(torch.Tensor): Cropped masks.
"""
if boxes.device != masks.device:
boxes = boxes.to(masks.device)
_, h, w = masks.shape
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # each shape (n,1,1)
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # columns (1,1,w)
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # rows (1,h,1)
# Apply the column and row masks separately and in place: the box region is separable, so this avoids ever
# materializing the full (n, h, w) boolean grid the combined product would build, and has no per-mask Python loop.
masks *= (r >= x1) * (r < x2) # zero columns outside the box
masks *= (c >= y1) * (c < y2) # zero rows outside the box
return masksLink to this section ultralytics.utils.ops.process_mask#
def process_mask(protos, masks_in, bboxes, shape, upsample: bool = False)Apply masks to bounding boxes using mask head output.
Args
| Name | Type | Description | Default |
|---|---|---|---|
protos | torch.Tensor | Mask prototypes with shape (mask_dim, mask_h, mask_w). | required |
masks_in | torch.Tensor | Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS. | required |
bboxes | torch.Tensor | Bounding boxes with shape (N, 4) where N is number of masks after NMS. | required |
shape | tuple | Input image size as (height, width). | required |
upsample | bool | Whether to upsample masks to original image size. | False |
Returns
| Type | Description |
|---|---|
torch.Tensor | A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS. When |
Source code in ultralytics/utils/ops.py
def process_mask(protos, masks_in, bboxes, shape, upsample: bool = False):
"""Apply masks to bounding boxes using mask head output.
Args:
protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
shape (tuple): Input image size as (height, width).
upsample (bool): Whether to upsample masks to original image size.
Returns:
(torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS. When
upsample=True h and w match the input image size; otherwise they are the prototype mask resolution.
"""
c, mh, mw = protos.shape # CHW
masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw) # NHW
width_ratio = mw / shape[1]
height_ratio = mh / shape[0]
ratios = torch.tensor([[width_ratio, height_ratio, width_ratio, height_ratio]], device=bboxes.device)
masks = crop_mask(masks, boxes=bboxes * ratios) # NHW
if upsample:
masks = F.interpolate(masks[None], shape, mode="bilinear")[0] # NHW
return masks.gt_(0.0).byte()Link to this section ultralytics.utils.ops.process_mask_native#
def process_mask_native(protos, masks_in, bboxes, shape)Apply masks to bounding boxes using mask head output with native upsampling.
Args
| Name | Type | Description | Default |
|---|---|---|---|
protos | torch.Tensor | Mask prototypes with shape (mask_dim, mask_h, mask_w). | required |
masks_in | torch.Tensor | Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS. | required |
bboxes | torch.Tensor | Bounding boxes with shape (N, 4) where N is number of masks after NMS. | required |
shape | tuple | Input image size as (height, width). | required |
Returns
| Type | Description |
|---|---|
torch.Tensor | Binary mask tensor with shape (N, H, W). |
Source code in ultralytics/utils/ops.py
def process_mask_native(protos, masks_in, bboxes, shape):
"""Apply masks to bounding boxes using mask head output with native upsampling.
Args:
protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
shape (tuple): Input image size as (height, width).
Returns:
(torch.Tensor): Binary mask tensor with shape (N, H, W).
"""
c, mh, mw = protos.shape # CHW
coeffs = masks_in @ protos.float().view(c, -1) # (N, mh*mw) prototype-resolution mask logits
h, w = shape
# Upsampling all N masks at once allocates an N*H*W float intermediate (~9 GB on a large image with many
# detections), which OOMs the worker. Upsample in chunks bounded by a pixel budget, thresholding each chunk to
# uint8 immediately so the float intermediate stays small, then crop the assembled uint8 stack.
step = max(1, 32_000_000 // (h * w))
masks = [
scale_masks(coeffs[i : i + step].view(-1, mh, mw)[None], shape)[0].gt_(0.0).byte()
for i in range(0, coeffs.shape[0], step)
]
return crop_mask(torch.cat(masks), bboxes)Link to this section ultralytics.utils.ops.scale_masks#
def scale_masks(
masks: torch.Tensor,
shape: tuple[int, int],
ratio_pad: tuple[tuple[int, int], tuple[int, int]] | None = None,
padding: bool = True,
mode: str = "bilinear",
) -> torch.TensorRescale segment masks to target shape.
Args
| Name | Type | Description | Default |
|---|---|---|---|
masks | torch.Tensor | Masks with shape (N, C, H, W). | required |
shape | tuple[int, int] | Target height and width as (height, width). | required |
ratio_pad | tuple, optional | Ratio and padding values as ((ratio_h, ratio_w), (pad_w, pad_h)). | None |
padding | bool | Whether masks are based on YOLO-style augmented images with padding. | True |
mode | str | Interpolation mode, e.g. 'bilinear' for logits or 'nearest' for integer class maps. | "bilinear" |
Returns
| Type | Description |
|---|---|
torch.Tensor | Rescaled masks. |
Source code in ultralytics/utils/ops.py
def scale_masks(
masks: torch.Tensor,
shape: tuple[int, int],
ratio_pad: tuple[tuple[int, int], tuple[int, int]] | None = None,
padding: bool = True,
mode: str = "bilinear",
) -> torch.Tensor:
"""Rescale segment masks to target shape.
Args:
masks (torch.Tensor): Masks with shape (N, C, H, W).
shape (tuple[int, int]): Target height and width as (height, width).
ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_w, pad_h)).
padding (bool): Whether masks are based on YOLO-style augmented images with padding.
mode (str): Interpolation mode, e.g. 'bilinear' for logits or 'nearest' for integer class maps.
Returns:
(torch.Tensor): Rescaled masks.
"""
im1_h, im1_w = masks.shape[2:]
im0_h, im0_w = shape[:2]
if im1_h == im0_h and im1_w == im0_w:
return masks
if ratio_pad is None: # calculate from im0_shape
gain = min(im1_h / im0_h, im1_w / im0_w) # gain = old / new
pad_w, pad_h = (im1_w - round(im0_w * gain)), (im1_h - round(im0_h * gain)) # wh padding
if padding:
pad_w /= 2
pad_h /= 2
else:
pad_w, pad_h = ratio_pad[1]
top, left = (round(pad_h - 0.1), round(pad_w - 0.1)) if padding else (0, 0)
bottom = im1_h - round(pad_h + 0.1)
right = im1_w - round(pad_w + 0.1)
return F.interpolate(masks[..., top:bottom, left:right].float(), shape, mode=mode) # NCHW masksLink to this section ultralytics.utils.ops.scale_coords#
def scale_coords(img1_shape, coords, img0_shape, ratio_pad = None, normalize: bool = False, padding: bool = True)Rescale segment coordinates from img1_shape to img0_shape.
Args
| Name | Type | Description | Default |
|---|---|---|---|
img1_shape | tuple | Source image shape as HWC or HW (supports both). | required |
coords | torch.Tensor | Coordinates to scale with shape (N, 2). | required |
img0_shape | tuple | Image 0 shape as HWC or HW (supports both). | required |
ratio_pad | tuple, optional | Ratio and padding values as ((ratio_h, ratio_w), (pad_w, pad_h)). | None |
normalize | bool | Whether to normalize coordinates to range [0, 1]. | False |
padding | bool | Whether coordinates are based on YOLO-style augmented images with padding. | True |
Returns
| Type | Description |
|---|---|
torch.Tensor | Scaled coordinates. |
Source code in ultralytics/utils/ops.py
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool = False, padding: bool = True):
"""Rescale segment coordinates from img1_shape to img0_shape.
Args:
img1_shape (tuple): Source image shape as HWC or HW (supports both).
coords (torch.Tensor): Coordinates to scale with shape (N, 2).
img0_shape (tuple): Image 0 shape as HWC or HW (supports both).
ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_w, pad_h)).
normalize (bool): Whether to normalize coordinates to range [0, 1].
padding (bool): Whether coordinates are based on YOLO-style augmented images with padding.
Returns:
(torch.Tensor): Scaled coordinates.
"""
img0_h, img0_w = img0_shape[:2] # supports both HWC or HW shapes
if ratio_pad is None: # calculate from img0_shape
img1_h, img1_w = img1_shape[:2] # supports both HWC or HW shapes
gain = min(img1_h / img0_h, img1_w / img0_w) # gain = old / new
pad = (img1_w - round(img0_w * gain)) / 2, (img1_h - round(img0_h * gain)) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
if padding:
coords[..., 0] -= pad[0] # x padding
coords[..., 1] -= pad[1] # y padding
coords[..., 0] /= gain
coords[..., 1] /= gain
coords = clip_coords(coords, img0_shape)
if normalize:
coords[..., 0] /= img0_w # width
coords[..., 1] /= img0_h # height
return coordsLink to this section ultralytics.utils.ops.regularize_rboxes#
def regularize_rboxes(rboxes)Regularize rotated bounding boxes to range [0, pi/2).
Args
| Name | Type | Description | Default |
|---|---|---|---|
rboxes | torch.Tensor | Input rotated boxes with shape (N, 5) in xywhr format. | required |
Returns
| Type | Description |
|---|---|
torch.Tensor | Regularized rotated boxes. |
Source code in ultralytics/utils/ops.py
def regularize_rboxes(rboxes):
"""Regularize rotated bounding boxes to range [0, pi/2).
Args:
rboxes (torch.Tensor): Input rotated boxes with shape (N, 5) in xywhr format.
Returns:
(torch.Tensor): Regularized rotated boxes.
"""
x, y, w, h, t = rboxes.unbind(dim=-1)
# Swap edge if t >= pi/2 while not being symmetrically opposite
swap = t % math.pi >= math.pi / 2
w_ = torch.where(swap, h, w)
h_ = torch.where(swap, w, h)
t = t % (math.pi / 2)
return torch.stack([x, y, w_, h_, t], dim=-1) # regularized boxesLink to this section ultralytics.utils.ops.masks2segments#
def masks2segments(masks: np.ndarray | torch.Tensor, strategy: str = "all") -> list[np.ndarray]Convert masks to segments using contour detection.
Args
| Name | Type | Description | Default |
|---|---|---|---|
masks | `np.ndarray | torch.Tensor` | Binary masks with shape (N, H, W). |
strategy | str | Segmentation strategy, either 'all' or 'largest'. | "all" |
Returns
| Type | Description |
|---|---|
list | List of segment masks as float32 arrays. |
Source code in ultralytics/utils/ops.py
def masks2segments(masks: np.ndarray | torch.Tensor, strategy: str = "all") -> list[np.ndarray]:
"""Convert masks to segments using contour detection.
Args:
masks (np.ndarray | torch.Tensor): Binary masks with shape (N, H, W).
strategy (str): Segmentation strategy, either 'all' or 'largest'.
Returns:
(list): List of segment masks as float32 arrays.
"""
from ultralytics.data.converter import merge_multi_segment
masks = masks.astype("uint8") if isinstance(masks, np.ndarray) else masks.byte().cpu().numpy()
segments = []
for x in np.ascontiguousarray(masks):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
if c:
if strategy == "all": # merge and concatenate all segments
c = (
np.concatenate(merge_multi_segment([x.reshape(-1, 2) for x in c]))
if len(c) > 1
else c[0].reshape(-1, 2)
)
elif strategy == "largest": # select largest segment
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
else:
c = np.zeros((0, 2)) # no segments found
segments.append(c.astype("float32"))
return segmentsLink to this section ultralytics.utils.ops.convert_torch2numpy_batch#
def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarrayConvert a batch of FP32 torch tensors to NumPy uint8 arrays, changing from BCHW to BHWC layout.
Args
| Name | Type | Description | Default |
|---|---|---|---|
batch | torch.Tensor | Input tensor batch with shape (Batch, Channels, Height, Width) and dtype torch.float32. | required |
Returns
| Type | Description |
|---|---|
np.ndarray | Output NumPy array batch with shape (Batch, Height, Width, Channels) and dtype uint8. |
Source code in ultralytics/utils/ops.py
def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
"""Convert a batch of FP32 torch tensors to NumPy uint8 arrays, changing from BCHW to BHWC layout.
Args:
batch (torch.Tensor): Input tensor batch with shape (Batch, Channels, Height, Width) and dtype torch.float32.
Returns:
(np.ndarray): Output NumPy array batch with shape (Batch, Height, Width, Channels) and dtype uint8.
"""
return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).byte().cpu().numpy()Link to this section ultralytics.utils.ops.clean_str#
def clean_str(s)Clean a string by replacing special characters with '_' character.
Args
| Name | Type | Description | Default |
|---|---|---|---|
s | str | A string needing special characters replaced. | required |
Returns
| Type | Description |
|---|---|
str | A string with special characters replaced by an underscore _. |
Source code in ultralytics/utils/ops.py
def clean_str(s):
"""Clean a string by replacing special characters with '_' character.
Args:
s (str): A string needing special characters replaced.
Returns:
(str): A string with special characters replaced by an underscore _.
"""
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨`><+]", repl="_", string=s)Link to this section ultralytics.utils.ops.empty_like#
def empty_like(x)Create empty torch.Tensor or np.ndarray with same shape and dtype as input.
Args
| Name | Type | Description | Default |
|---|---|---|---|
x | required |
Source code in ultralytics/utils/ops.py
def empty_like(x):
"""Create empty torch.Tensor or np.ndarray with same shape and dtype as input."""
return torch.empty_like(x, dtype=x.dtype) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=x.dtype)Link to this section ultralytics.utils.ops.linear_sum_assignment#
def linear_sum_assignment(cost_matrix)Solve the rectangular linear sum assignment problem (minimum-cost one-to-one matching).
Uses scipy.optimize.linear_sum_assignment when SciPy is installed (faster compiled C++ solver), and otherwise
falls back to an equivalent pure-NumPy implementation of the same modified Jonker-Volgenant shortest augmenting path
algorithm (Crouse 2016). This keeps SciPy out of Ultralytics' required dependencies while preserving its speed when
present. SciPy is imported lazily so it never slows import ultralytics. For a rectangular matrix only min(rows,
columns) entries are matched.
The NumPy fallback expects finite costs: an inf entry marks a forbidden assignment (matching SciPy), while NaN
is not rejected (SciPy raises), so callers must sanitize NaN upstream (e.g. the RT-DETR matcher zeros NaN/inf
beforehand). The two backends may return a different equal-cost assignment under exact ties, but the total cost is
identical.
The NumPy fallback is validated against SciPy with exact optimal-cost parity across ~6.9k randomized cases (every
shape including empty/tall/wide, ties, negatives, IoU- and RT-DETR-style matrices, maximize via negation,
torch-tensor input) plus ~2k independent brute-force global-optimum checks. SciPy's compiled inner loop is faster,
but at the call-site sizes (smaller dimension = object count) the fallback runs in well under a millisecond:
cost matrix NumPy SciPy
300 x 20 0.2ms 0.02ms
300 x 80 0.6ms 0.1ms
300 x 300 28ms 1.5msArgs
| Name | Type | Description | Default |
|---|---|---|---|
cost_matrix | `np.ndarray | torch.Tensor` | Cost matrix with shape (N, M) and finite values. |
Returns
| Type | Description |
|---|---|
row_ind (np.ndarray) | Row indices of the optimal assignment, sorted ascending, with length min(N, M). |
col_ind (np.ndarray) | Column indices matched to each row in row_ind. |
Examples
>>> cost = np.array([[4, 1, 3], [2, 0, 5], [3, 2, 2]], dtype=float)
>>> row_ind, col_ind = linear_sum_assignment(cost)
>>> float(cost[row_ind, col_ind].sum())
5.0Source code in ultralytics/utils/ops.py
def linear_sum_assignment(cost_matrix):
"""Solve the rectangular linear sum assignment problem (minimum-cost one-to-one matching).
Uses `scipy.optimize.linear_sum_assignment` when SciPy is installed (faster compiled C++ solver), and otherwise
falls back to an equivalent pure-NumPy implementation of the same modified Jonker-Volgenant shortest augmenting path
algorithm (Crouse 2016). This keeps SciPy out of Ultralytics' required dependencies while preserving its speed when
present. SciPy is imported lazily so it never slows `import ultralytics`. For a rectangular matrix only min(rows,
columns) entries are matched.
The NumPy fallback expects finite costs: an `inf` entry marks a forbidden assignment (matching SciPy), while `NaN`
is not rejected (SciPy raises), so callers must sanitize NaN upstream (e.g. the RT-DETR matcher zeros NaN/inf
beforehand). The two backends may return a different equal-cost assignment under exact ties, but the total cost is
identical.
The NumPy fallback is validated against SciPy with exact optimal-cost parity across ~6.9k randomized cases (every
shape including empty/tall/wide, ties, negatives, IoU- and RT-DETR-style matrices, `maximize` via negation,
torch-tensor input) plus ~2k independent brute-force global-optimum checks. SciPy's compiled inner loop is faster,
but at the call-site sizes (smaller dimension = object count) the fallback runs in well under a millisecond:
cost matrix NumPy SciPy
300 x 20 0.2ms 0.02ms
300 x 80 0.6ms 0.1ms
300 x 300 28ms 1.5ms
Args:
cost_matrix (np.ndarray | torch.Tensor): Cost matrix with shape (N, M) and finite values.
Returns:
row_ind (np.ndarray): Row indices of the optimal assignment, sorted ascending, with length min(N, M).
col_ind (np.ndarray): Column indices matched to each row in row_ind.
Examples:
>>> cost = np.array([[4, 1, 3], [2, 0, 5], [3, 2, 2]], dtype=float)
>>> row_ind, col_ind = linear_sum_assignment(cost)
>>> float(cost[row_ind, col_ind].sum())
5.0
"""
global _assignment_solver
if _assignment_solver is None: # resolve the backend once, then reuse it on every later call
try:
from scipy.optimize import linear_sum_assignment as solver # faster compiled C++ solver when installed
_assignment_solver = solver
except ImportError:
_assignment_solver = _linear_sum_assignment_numpy
return _assignment_solver(np.asarray(cost_matrix, dtype=np.float64))Link to this section ultralytics.utils.ops._linear_sum_assignment_numpy#
def _linear_sum_assignment_numpy(a)Solve the rectangular linear sum assignment problem with NumPy (Jonker-Volgenant SciPy-free fallback).
Args
| Name | Type | Description | Default |
|---|---|---|---|
a | np.ndarray | Cost matrix of shape (N, M) with dtype float64 and finite values. | required |
Returns
| Type | Description |
|---|---|
row_ind (np.ndarray) | Row indices of the optimal assignment, sorted ascending, with length min(N, M). |
col_ind (np.ndarray) | Column indices matched to each row in row_ind. |
Source code in ultralytics/utils/ops.py
def _linear_sum_assignment_numpy(a):
"""Solve the rectangular linear sum assignment problem with NumPy (Jonker-Volgenant SciPy-free fallback).
Args:
a (np.ndarray): Cost matrix of shape (N, M) with dtype float64 and finite values.
Returns:
row_ind (np.ndarray): Row indices of the optimal assignment, sorted ascending, with length min(N, M).
col_ind (np.ndarray): Column indices matched to each row in row_ind.
"""
n, m = a.shape
if n == 0 or m == 0:
return np.empty(0, dtype=np.intp), np.empty(0, dtype=np.intp)
transposed = n > m
if transposed:
a, n, m = a.T, m, n # ensure rows <= columns
u, v = np.zeros(n + 1), np.zeros(m + 1) # row and column dual potentials
p, way = np.zeros(m + 1, np.intp), np.zeros(m + 1, np.intp) # column->row matches and path pointers
for i in range(1, n + 1):
p[0], j0 = i, 0
minv, used = np.full(m + 1, np.inf), np.zeros(m + 1, bool)
while True: # grow a shortest augmenting path from row i
used[j0] = True
i0 = p[j0]
cur = a[i0 - 1] - u[i0] - v[1:]
improve = (~used[1:]) & (cur < minv[1:])
minv[1:][improve], way[1:][improve] = cur[improve], j0
j1 = int(np.argmin(np.where(used[1:], np.inf, minv[1:]))) + 1
delta = minv[j1]
u[p[used]] += delta
v[used] -= delta
minv[~used] -= delta
j0 = j1
if p[j0] == 0:
break
while j0: # augment along the path
p[j0] = p[way[j0]]
j0 = way[j0]
cols = np.nonzero(p[1:])[0]
rows = p[1:][cols] - 1
row_ind, col_ind = (cols, rows) if transposed else (rows, cols)
order = np.argsort(row_ind, kind="stable") # match scipy's row-sorted output
return row_ind[order].astype(np.intp), col_ind[order].astype(np.intp)