Skip to content

Reference for ultralytics/utils/autodevice.py

Note

This file is available at https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/autodevice.py. If you spot a problem please help fix it by contributing a Pull Request 🛠️. Thank you 🙏!


ultralytics.utils.autodevice.GPUInfo

GPUInfo()

Manages NVIDIA GPU information via pynvml with robust error handling.

Provides methods to query detailed GPU statistics (utilization, memory, temp, power) and select the most idle GPUs based on configurable criteria. It safely handles the absence or initialization failure of the pynvml library by logging warnings and disabling related features, preventing application crashes.

Includes fallback logic using torch.cuda for basic device counting if NVML is unavailable during GPU selection. Manages NVML initialization and shutdown internally.

Attributes:

Name Type Description
pynvml module | None

The pynvml module if successfully imported and initialized, otherwise None.

nvml_available bool

Indicates if pynvml is ready for use. True if import and nvmlInit() succeeded, False otherwise.

gpu_stats list[dict]

A list of dictionaries, each holding stats for one GPU. Populated on initialization and by refresh_stats(). Keys include: 'index', 'name', 'utilization' (%), 'memory_used' (MiB), 'memory_total' (MiB), 'memory_free' (MiB), 'temperature' (C), 'power_draw' (W), 'power_limit' (W or 'N/A'). Empty if NVML is unavailable or queries fail.

Source code in ultralytics/utils/autodevice.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def __init__(self):
    """Initializes GPUInfo, attempting to import and initialize pynvml."""
    self.pynvml = None
    self.nvml_available = False
    self.gpu_stats = []

    try:
        check_requirements("pynvml>=12.0.0")
        self.pynvml = __import__("pynvml")
        self.pynvml.nvmlInit()
        self.nvml_available = True
        self.refresh_stats()
    except Exception as e:
        LOGGER.warning(f"Failed to initialize pynvml, GPU stats disabled: {e}")

__del__

__del__()

Ensures NVML is shut down when the object is garbage collected.

Source code in ultralytics/utils/autodevice.py
43
44
45
def __del__(self):
    """Ensures NVML is shut down when the object is garbage collected."""
    self.shutdown()

print_status

print_status()

Prints GPU status in a compact table format using current stats.

Source code in ultralytics/utils/autodevice.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def print_status(self):
    """Prints GPU status in a compact table format using current stats."""
    self.refresh_stats()
    if not self.gpu_stats:
        LOGGER.warning("No GPU stats available.")
        return

    stats = self.gpu_stats
    name_len = max(len(gpu.get("name", "N/A")) for gpu in stats)
    hdr = f"{'Idx':<3} {'Name':<{name_len}} {'Util':>6} {'Mem (MiB)':>15} {'Temp':>5} {'Pwr (W)':>10}"
    LOGGER.info(f"\n--- GPU Status ---\n{hdr}\n{'-' * len(hdr)}")

    for gpu in stats:
        u = f"{gpu['utilization']:>5}%" if gpu["utilization"] >= 0 else " N/A "
        m = f"{gpu['memory_used']:>6}/{gpu['memory_total']:<6}" if gpu["memory_used"] >= 0 else " N/A / N/A "
        t = f"{gpu['temperature']}C" if gpu["temperature"] >= 0 else " N/A "
        p = f"{gpu['power_draw']:>3}/{gpu['power_limit']:<3}" if gpu["power_draw"] >= 0 else " N/A "

        LOGGER.info(f"{gpu.get('index'):<3d} {gpu.get('name', 'N/A'):<{name_len}} {u:>6} {m:>15} {t:>5} {p:>10}")

    LOGGER.info(f"{'-' * len(hdr)}\n")

refresh_stats

refresh_stats()

Refreshes the internal gpu_stats list by querying NVML.

Source code in ultralytics/utils/autodevice.py
56
57
58
59
60
61
62
63
64
65
66
67
68
def refresh_stats(self):
    """Refreshes the internal gpu_stats list by querying NVML."""
    self.gpu_stats = []
    if not self.nvml_available or not self.pynvml:
        return

    try:
        device_count = self.pynvml.nvmlDeviceGetCount()
        for i in range(device_count):
            self.gpu_stats.append(self._get_device_stats(i))
    except Exception as e:
        LOGGER.warning(f"Error during device query: {e}")
        self.gpu_stats = []

select_idle_gpu

select_idle_gpu(count=1, min_memory_mb=0)

Selects the 'count' most idle GPUs based on utilization and free memory.

Parameters:

Name Type Description Default
count int

The number of idle GPUs to select. Defaults to 1.

1
min_memory_mb int

Minimum free memory required (MiB). Defaults to 0.

0

Returns:

Type Description
list[int]

Indices of the selected GPUs, sorted by idleness.

Notes

Returns fewer than 'count' if not enough qualify or exist. Returns basic CUDA indices if NVML fails. Empty list if no GPUs found.

Source code in ultralytics/utils/autodevice.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def select_idle_gpu(self, count=1, min_memory_mb=0):
    """
    Selects the 'count' most idle GPUs based on utilization and free memory.

    Args:
        count (int): The number of idle GPUs to select. Defaults to 1.
        min_memory_mb (int): Minimum free memory required (MiB). Defaults to 0.

    Returns:
        (list[int]): Indices of the selected GPUs, sorted by idleness.

    Notes:
         Returns fewer than 'count' if not enough qualify or exist.
         Returns basic CUDA indices if NVML fails. Empty list if no GPUs found.
    """
    LOGGER.info(f"Searching for {count} idle GPUs with >= {min_memory_mb} MiB free memory...")

    if count <= 0:
        return []

    self.refresh_stats()
    if not self.gpu_stats:
        LOGGER.warning("NVML stats unavailable.")
        return []

    # Filter and sort eligible GPUs
    eligible_gpus = [
        gpu
        for gpu in self.gpu_stats
        if gpu.get("memory_free", -1) >= min_memory_mb and gpu.get("utilization", -1) != -1
    ]
    eligible_gpus.sort(key=lambda x: (x.get("utilization", 101), -x.get("memory_free", 0)))

    # Select top 'count' indices
    selected = [gpu["index"] for gpu in eligible_gpus[:count]]

    if selected:
        LOGGER.info(f"Selected idle CUDA devices {selected}")
    else:
        LOGGER.warning(f"No GPUs met criteria (Util != -1, Free Mem >= {min_memory_mb} MiB).")

    return selected

shutdown

shutdown()

Shuts down NVML if it was initialized.

Source code in ultralytics/utils/autodevice.py
47
48
49
50
51
52
53
54
def shutdown(self):
    """Shuts down NVML if it was initialized."""
    if self.nvml_available and self.pynvml:
        try:
            self.pynvml.nvmlShutdown()
        except Exception:
            pass
        self.nvml_available = False





📅 Created 14 days ago ✏️ Updated 14 days ago