Reference for `ultralytics/hub/session.py`

Note

This file is available at https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/session.py. If you spot a problem please help fix it by contributing a Pull Request 🛠️. Thank you 🙏!

ultralytics.hub.session.HUBTrainingSession

HUBTrainingSession(identifier: str)

HUB training session for Ultralytics HUB YOLO models.

This class encapsulates the functionality for interacting with Ultralytics HUB during model training, including model creation, metrics tracking, and checkpoint uploading.

Attributes:

Name	Type	Description
`model_id`	`str`	Identifier for the YOLO model being trained.
`model_url`	`str`	URL for the model in Ultralytics HUB.
`rate_limits`	`dict[str, int]`	Rate limits for different API calls in seconds.
`timers`	`dict[str, Any]`	Timers for rate limiting.
`metrics_queue`	`dict[str, Any]`	Queue for the model's metrics.
`metrics_upload_failed_queue`	`dict[str, Any]`	Queue for metrics that failed to upload.
`model`	`Any`	Model data fetched from Ultralytics HUB.
`model_file`	`str`	Path to the model file.
`train_args`	`dict[str, Any]`	Arguments for training the model.
`client`	`Any`	Client for interacting with Ultralytics HUB.
`filename`	`str`	Filename of the model.

Examples:

Create a training session with a model URL

>>> session = HUBTrainingSession("https://hub.ultralytics.com/models/example-model")
>>> session.upload_metrics()

Parameters:

Name	Type	Description	Default
`identifier`	`str`	Model identifier used to initialize the HUB training session. It can be a URL string or a model key with specific format.	required

Raises:

Type	Description
`ValueError`	If the provided model identifier is invalid.
`ConnectionError`	If connecting with global API key is not supported.
`ModuleNotFoundError`	If hub-sdk package is not installed.

Source code in ultralytics/hub/session.py

def __init__(self, identifier: str):
    """Initialize the HUBTrainingSession with the provided model identifier.

    Args:
        identifier (str): Model identifier used to initialize the HUB training session. It can be a URL string or a
            model key with specific format.

    Raises:
        ValueError: If the provided model identifier is invalid.
        ConnectionError: If connecting with global API key is not supported.
        ModuleNotFoundError: If hub-sdk package is not installed.
    """
    from hub_sdk import HUBClient

    self.rate_limits = {"metrics": 3, "ckpt": 900, "heartbeat": 300}  # rate limits (seconds)
    self.metrics_queue = {}  # holds metrics for each epoch until upload
    self.metrics_upload_failed_queue = {}  # holds metrics for each epoch if upload failed
    self.timers = {}  # holds timers in ultralytics/utils/callbacks/hub.py
    self.model = None
    self.model_url = None
    self.model_file = None
    self.train_args = None

    # Parse input
    api_key, model_id, self.filename = self._parse_identifier(identifier)

    # Get credentials
    active_key = api_key or SETTINGS.get("api_key")
    credentials = {"api_key": active_key} if active_key else None  # set credentials

    # Initialize client
    self.client = HUBClient(credentials)

    # Load models
    try:
        if model_id:
            self.load_model(model_id)  # load existing model
        else:
            self.model = self.client.model()  # load empty model
    except Exception:
        if identifier.startswith(f"{HUB_WEB_ROOT}/models/") and not self.client.authenticated:
            LOGGER.warning(
                f"{PREFIX}Please log in using 'yolo login API_KEY'. "
                "You can find your API Key at: https://hub.ultralytics.com/settings?tab=api+keys."
            )

create_model

create_model(model_args: dict[str, Any])

Initialize a HUB training session with the specified model arguments.

Parameters:

Name	Type	Description	Default
`model_args`	`dict[str, Any]`	Arguments for creating the model, including batch size, epochs, image size, etc.	required

Returns:

Type	Description
`None`	If the model could not be created.

Source code in ultralytics/hub/session.py

def create_model(self, model_args: dict[str, Any]):
    """Initialize a HUB training session with the specified model arguments.

    Args:
        model_args (dict[str, Any]): Arguments for creating the model, including batch size, epochs, image size,
            etc.

    Returns:
        (None): If the model could not be created.
    """
    payload = {
        "config": {
            "batchSize": model_args.get("batch", -1),
            "epochs": model_args.get("epochs", 300),
            "imageSize": model_args.get("imgsz", 640),
            "patience": model_args.get("patience", 100),
            "device": str(model_args.get("device", "")),  # convert None to string
            "cache": str(model_args.get("cache", "ram")),  # convert True, False, None to string
        },
        "dataset": {"name": model_args.get("data")},
        "lineage": {
            "architecture": {"name": self.filename.replace(".pt", "").replace(".yaml", "")},
            "parent": {},
        },
        "meta": {"name": self.filename},
    }

    if self.filename.endswith(".pt"):
        payload["lineage"]["parent"]["name"] = self.filename

    self.model.create_model(payload)

    # Model could not be created
    # TODO: improve error handling
    if not self.model.id:
        return None

    self.model_url = f"{HUB_WEB_ROOT}/models/{self.model.id}"

    # Start heartbeats for HUB to monitor agent
    self.model.start_heartbeat(self.rate_limits["heartbeat"])

    LOGGER.info(f"{PREFIX}View model at {self.model_url} 🚀")

create_session`classmethod`

create_session(identifier: str, args: dict[str, Any] | None = None)

Create an authenticated HUBTrainingSession or return None.

Parameters:

Name	Type	Description	Default
`identifier`	`str`	Model identifier used to initialize the HUB training session.	required
`args`	`dict[str, Any]`	Arguments for creating a new model if identifier is not a HUB model URL.	`None`

Returns:

Name	Type	Description
`session`	`HUBTrainingSession \| None`	An authenticated session or None if creation fails.

Source code in ultralytics/hub/session.py

@classmethod
def create_session(cls, identifier: str, args: dict[str, Any] | None = None):
    """Create an authenticated HUBTrainingSession or return None.

    Args:
        identifier (str): Model identifier used to initialize the HUB training session.
        args (dict[str, Any], optional): Arguments for creating a new model if identifier is not a HUB model URL.

    Returns:
        session (HUBTrainingSession | None): An authenticated session or None if creation fails.
    """
    try:
        session = cls(identifier)
        if args and not identifier.startswith(f"{HUB_WEB_ROOT}/models/"):  # not a HUB model URL
            session.create_model(args)
            assert session.model.id, "HUB model not loaded correctly"
        return session
    # PermissionError and ModuleNotFoundError indicate hub-sdk not installed
    except (PermissionError, ModuleNotFoundError, AssertionError):
        return None

load_model

load_model(model_id: str)

Load an existing model from Ultralytics HUB using the provided model identifier.

Parameters:

Name	Type	Description	Default
`model_id`	`str`	The identifier of the model to load.	required

Raises:

Type	Description
`ValueError`	If the specified HUB model does not exist.

Source code in ultralytics/hub/session.py

def load_model(self, model_id: str):
    """Load an existing model from Ultralytics HUB using the provided model identifier.

    Args:
        model_id (str): The identifier of the model to load.

    Raises:
        ValueError: If the specified HUB model does not exist.
    """
    self.model = self.client.model(model_id)
    if not self.model.data:  # then model does not exist
        raise ValueError(emojis("❌ The specified HUB model does not exist"))  # TODO: improve error handling

    self.model_url = f"{HUB_WEB_ROOT}/models/{self.model.id}"
    if self.model.is_trained():
        LOGGER.info(f"Loading trained HUB model {self.model_url} 🚀")
        url = self.model.get_weights_url("best")  # download URL with auth
        self.model_file = checks.check_file(url, download_dir=Path(SETTINGS["weights_dir"]) / "hub" / self.model.id)
        return

    # Set training args and start heartbeats for HUB to monitor agent
    self._set_train_args()
    self.model.start_heartbeat(self.rate_limits["heartbeat"])
    LOGGER.info(f"{PREFIX}View model at {self.model_url} 🚀")

request_queue

request_queue(
    request_func,
    retry: int = 3,
    timeout: int = 30,
    thread: bool = True,
    verbose: bool = True,
    progress_total: int | None = None,
    stream_response: bool | None = None,
    *args,
    **kwargs
)

Execute request_func with retries, timeout handling, optional threading, and progress tracking.

Parameters:

Name	Type	Description	Default
`request_func`	`callable`	The function to execute.	required
`retry`	`int`	Number of retry attempts.	`3`
`timeout`	`int`	Maximum time to wait for the request to complete.	`30`
`thread`	`bool`	Whether to run the request in a separate thread.	`True`
`verbose`	`bool`	Whether to log detailed messages.	`True`
`progress_total`	`int`	Total size for progress tracking.	`None`
`stream_response`	`bool`	Whether to stream the response.	`None`
`*args`	`Any`	Additional positional arguments for request_func.	`()`
`**kwargs`	`Any`	Additional keyword arguments for request_func.	`{}`

Returns:

Type	Description
`Response \| None`	The response object if thread=False, otherwise None.

Source code in ultralytics/hub/session.py

def request_queue(
    self,
    request_func,
    retry: int = 3,
    timeout: int = 30,
    thread: bool = True,
    verbose: bool = True,
    progress_total: int | None = None,
    stream_response: bool | None = None,
    *args,
    **kwargs,
):
    """Execute request_func with retries, timeout handling, optional threading, and progress tracking.

    Args:
        request_func (callable): The function to execute.
        retry (int): Number of retry attempts.
        timeout (int): Maximum time to wait for the request to complete.
        thread (bool): Whether to run the request in a separate thread.
        verbose (bool): Whether to log detailed messages.
        progress_total (int, optional): Total size for progress tracking.
        stream_response (bool, optional): Whether to stream the response.
        *args (Any): Additional positional arguments for request_func.
        **kwargs (Any): Additional keyword arguments for request_func.

    Returns:
        (requests.Response | None): The response object if thread=False, otherwise None.
    """

    def retry_request():
        """Attempt to call request_func with retries, timeout, and optional threading."""
        t0 = time.time()  # Record the start time for the timeout
        response = None
        for i in range(retry + 1):
            if (time.time() - t0) > timeout:
                LOGGER.warning(f"{PREFIX}Timeout for request reached. {HELP_MSG}")
                break  # Timeout reached, exit loop

            response = request_func(*args, **kwargs)
            if response is None:
                LOGGER.warning(f"{PREFIX}Received no response from the request. {HELP_MSG}")
                time.sleep(2**i)  # Exponential backoff before retrying
                continue  # Skip further processing and retry

            if progress_total:
                self._show_upload_progress(progress_total, response)
            elif stream_response:
                self._iterate_content(response)

            if HTTPStatus.OK <= response.status_code < HTTPStatus.MULTIPLE_CHOICES:
                # if request related to metrics upload
                if kwargs.get("metrics"):
                    self.metrics_upload_failed_queue = {}
                return response  # Success, no need to retry

            if i == 0:
                # Initial attempt, check status code and provide messages
                message = self._get_failure_message(response, retry, timeout)

                if verbose:
                    LOGGER.warning(f"{PREFIX}{message} {HELP_MSG} ({response.status_code})")

            if not self._should_retry(response.status_code):
                LOGGER.warning(f"{PREFIX}Request failed. {HELP_MSG} ({response.status_code}")
                break  # Not an error that should be retried, exit loop

            time.sleep(2**i)  # Exponential backoff for retries

        # if request related to metrics upload and exceed retries
        if response is None and kwargs.get("metrics"):
            self.metrics_upload_failed_queue.update(kwargs.get("metrics"))

        return response

    if thread:
        # Start a new thread to run the retry_request function
        threading.Thread(target=retry_request, daemon=True).start()
    else:
        # If running in the main thread, call retry_request directly
        return retry_request()

upload_metrics

upload_metrics()

Upload model metrics to Ultralytics HUB.

Source code in ultralytics/hub/session.py

def upload_metrics(self):
    """Upload model metrics to Ultralytics HUB."""
    return self.request_queue(self.model.upload_metrics, metrics=self.metrics_queue.copy(), thread=True)

upload_model

upload_model(
    epoch: int,
    weights: str,
    is_best: bool = False,
    map: float = 0.0,
    final: bool = False,
) -> None

Upload a model checkpoint to Ultralytics HUB.

Parameters:

Name	Type	Description	Default
`epoch`	`int`	The current training epoch.	required
`weights`	`str`	Path to the model weights file.	required
`is_best`	`bool`	Indicates if the current model is the best one so far.	`False`
`map`	`float`	Mean average precision of the model.	`0.0`
`final`	`bool`	Indicates if the model is the final model after training.	`False`

Source code in ultralytics/hub/session.py

def upload_model(
    self,
    epoch: int,
    weights: str,
    is_best: bool = False,
    map: float = 0.0,
    final: bool = False,
) -> None:
    """Upload a model checkpoint to Ultralytics HUB.

    Args:
        epoch (int): The current training epoch.
        weights (str): Path to the model weights file.
        is_best (bool): Indicates if the current model is the best one so far.
        map (float): Mean average precision of the model.
        final (bool): Indicates if the model is the final model after training.
    """
    weights = Path(weights)
    if not weights.is_file():
        last = weights.with_name(f"last{weights.suffix}")
        if final and last.is_file():
            LOGGER.warning(
                f"{PREFIX} Model 'best.pt' not found, copying 'last.pt' to 'best.pt' and uploading. "
                "This often happens when resuming training in transient environments like Google Colab. "
                "For more reliable training, consider using Ultralytics HUB Cloud. "
                "Learn more at https://docs.ultralytics.com/hub/cloud-training."
            )
            shutil.copy(last, weights)  # copy last.pt to best.pt
        else:
            LOGGER.warning(f"{PREFIX} Model upload issue. Missing model {weights}.")
            return

    self.request_queue(
        self.model.upload_model,
        epoch=epoch,
        weights=str(weights),
        is_best=is_best,
        map=map,
        final=final,
        retry=10,
        timeout=3600,
        thread=not final,
        progress_total=weights.stat().st_size if final else None,  # only show progress if final
        stream_response=True,
    )

📅 Created 2 years ago ✏️ Updated 1 year ago

Reference for ultralytics/hub/session.py

ultralytics.hub.session.HUBTrainingSession

create_model

create_sessionclassmethod

load_model

request_queue

upload_metrics

upload_model

Reference for `ultralytics/hub/session.py`

create_session`classmethod`