Skip to content

Reference for hub_sdk/modules/datasets.py

Improvements

This page is sourced from https://github.com/ultralytics/hub-sdk/blob/main/hub_sdk/modules/datasets.py. Have an improvement or example to add? Open a Pull Request — thank you! 🙏


class hub_sdk.modules.datasets.Datasets

Datasets(self, dataset_id: str | None = None, headers: dict[str, Any] | None = None)

Bases: CRUDClient

A class representing a client for interacting with Datasets through CRUD operations.

This class extends the CRUDClient class and provides specific methods for working with Datasets.

Args

NameTypeDescriptionDefault
dataset_idstr, optionalUnique id of the dataset.None
headersDict, optionalHeaders to include in HTTP requests.None

Attributes

NameTypeDescription
hub_clientDatasetUploadAn instance of DatasetUpload used for interacting with dataset uploads.
idstr | NoneThe unique identifier of the dataset, if available.
dataDictA dictionary to store dataset data.

Methods

NameDescription
create_datasetCreate a new dataset with the provided data and set the dataset ID for the current instance.
deleteDelete the dataset resource represented by this instance.
get_dataRetrieve data for the current dataset instance.
get_download_linkGet dataset download link.
updateUpdate the dataset resource represented by this instance.
upload_datasetUpload a dataset file to the hub.

Notes

The 'id' attribute is set during initialization and can be used to uniquely identify a dataset. The 'data' attribute is used to store dataset data fetched from the API.

Source code in hub_sdk/modules/datasets.pyView on GitHub
class Datasets(CRUDClient):
    """A class representing a client for interacting with Datasets through CRUD operations.

    This class extends the CRUDClient class and provides specific methods for working with Datasets.

    Attributes:
        hub_client (DatasetUpload): An instance of DatasetUpload used for interacting with dataset uploads.
        id (str | None): The unique identifier of the dataset, if available.
        data (Dict): A dictionary to store dataset data.

    Notes:
        The 'id' attribute is set during initialization and can be used to uniquely identify a dataset.
        The 'data' attribute is used to store dataset data fetched from the API.
    """

    def __init__(self, dataset_id: str | None = None, headers: dict[str, Any] | None = None):
        """Initialize a Datasets client.

        Args:
            dataset_id (str, optional): Unique id of the dataset.
            headers (Dict, optional): Headers to include in HTTP requests.
        """
        super().__init__("datasets", "dataset", headers)
        self.hub_client = DatasetUpload(headers)
        self.id = dataset_id
        self.data = {}
        if dataset_id:
            self.get_data()


method hub_sdk.modules.datasets.Datasets.create_dataset

def create_dataset(self, dataset_data: dict) -> None

Create a new dataset with the provided data and set the dataset ID for the current instance.

Args

NameTypeDescriptionDefault
dataset_dataDictA dictionary containing the data for creating the dataset.required
Source code in hub_sdk/modules/datasets.pyView on GitHub
def create_dataset(self, dataset_data: dict) -> None:
    """Create a new dataset with the provided data and set the dataset ID for the current instance.

    Args:
        dataset_data (Dict): A dictionary containing the data for creating the dataset.
    """
    resp = super().create(dataset_data).json()
    self.id = resp.get("data", {}).get("id")
    self.get_data()


method hub_sdk.modules.datasets.Datasets.delete

def delete(self, hard: bool = False) -> Response | None

Delete the dataset resource represented by this instance.

Args

NameTypeDescriptionDefault
hardbool, optionalIf True, perform a hard delete.False

Returns

TypeDescription
Optional[Response]Response object from the delete request, or None if delete fails.

Notes

The 'hard' parameter determines whether to perform a soft delete (default) or a hard delete. In a soft delete, the dataset might be marked as deleted but retained in the system. In a hard delete, the dataset is permanently removed from the system.

Source code in hub_sdk/modules/datasets.pyView on GitHub
def delete(self, hard: bool = False) -> Response | None:
    """Delete the dataset resource represented by this instance.

    Args:
        hard (bool, optional): If True, perform a hard delete.

    Returns:
        (Optional[Response]): Response object from the delete request, or None if delete fails.

    Notes:
        The 'hard' parameter determines whether to perform a soft delete (default) or a hard delete.
        In a soft delete, the dataset might be marked as deleted but retained in the system.
        In a hard delete, the dataset is permanently removed from the system.
    """
    return super().delete(self.id, hard)


method hub_sdk.modules.datasets.Datasets.get_data

def get_data(self) -> None

Retrieve data for the current dataset instance.

If a valid dataset ID has been set, it sends a request to fetch the dataset data and stores it in the instance. If no dataset ID has been set, it logs an error message.

Source code in hub_sdk/modules/datasets.pyView on GitHub
def get_data(self) -> None:
    """Retrieve data for the current dataset instance.

    If a valid dataset ID has been set, it sends a request to fetch the dataset data and stores it in the instance.
    If no dataset ID has been set, it logs an error message.
    """
    if not self.id:
        self.logger.error("No dataset id has been set. Update the dataset id or create a dataset.")
        return

    try:
        response = super().read(self.id)

        if response is None:
            self.logger.error(f"Received no response from the server for dataset ID: {self.id}")
            return

        # Check if the response has a .json() method (it should if it's a response object)
        if not hasattr(response, "json"):
            self.logger.error(f"Invalid response object received for dataset ID: {self.id}")
            return

        resp_data = response.json()
        if resp_data is None:
            self.logger.error(f"No data received in the response for dataset ID: {self.id}")
            return

        self.data = resp_data.get("data", {})
        self.logger.debug(f"Dataset data retrieved for ID: {self.id}")

    except Exception as e:
        self.logger.error(f"An error occurred while retrieving data for dataset ID: {self.id}, {e}")


def get_download_link(self) -> str | None

Get dataset download link.

Returns

TypeDescription
Optional[str]Return download link or None if the link is not available.
Source code in hub_sdk/modules/datasets.pyView on GitHub
def get_download_link(self) -> str | None:
    """Get dataset download link.

    Returns:
        (Optional[str]): Return download link or None if the link is not available.
    """
    return self.data.get("url")


method hub_sdk.modules.datasets.Datasets.update

def update(self, data: dict) -> Response | None

Update the dataset resource represented by this instance.

Args

NameTypeDescriptionDefault
dataDictThe updated data for the dataset resource.required

Returns

TypeDescription
Optional[Response]Response object from the update request, or None if update fails.
Source code in hub_sdk/modules/datasets.pyView on GitHub
def update(self, data: dict) -> Response | None:
    """Update the dataset resource represented by this instance.

    Args:
        data (Dict): The updated data for the dataset resource.

    Returns:
        (Optional[Response]): Response object from the update request, or None if update fails.
    """
    return super().update(self.id, data)


method hub_sdk.modules.datasets.Datasets.upload_dataset

def upload_dataset(self, file: str | None = None) -> Response | None

Upload a dataset file to the hub.

Args

NameTypeDescriptionDefault
filestr, optionalThe path to the dataset file to upload.None

Returns

TypeDescription
Optional[Response]Response object from the upload request, or None if upload fails.
Source code in hub_sdk/modules/datasets.pyView on GitHub
def upload_dataset(self, file: str | None = None) -> Response | None:
    """Upload a dataset file to the hub.

    Args:
        file (str, optional): The path to the dataset file to upload.

    Returns:
        (Optional[Response]): Response object from the upload request, or None if upload fails.
    """
    return self.hub_client.upload_dataset(self.id, file)





class hub_sdk.modules.datasets.DatasetList

DatasetList(self, page_size = None, public = None, headers = None)

Bases: PaginatedList

A class for managing a paginated list of datasets from the Ultralytics Hub API.

Args

NameTypeDescriptionDefault
page_sizeint, optionalThe number of items to request per page.None
publicbool, optionalWhether the items should be publicly accessible.None
headersDict, optionalHeaders to be included in API requests.None
Source code in hub_sdk/modules/datasets.pyView on GitHub
class DatasetList(PaginatedList):
    """A class for managing a paginated list of datasets from the Ultralytics Hub API."""

    def __init__(self, page_size=None, public=None, headers=None):
        """Initialize a DatasetList instance.

        Args:
            page_size (int, optional): The number of items to request per page.
            public (bool, optional): Whether the items should be publicly accessible.
            headers (Dict, optional): Headers to be included in API requests.
        """
        base_endpoint = "datasets"
        super().__init__(base_endpoint, "dataset", page_size, public, headers)