Skip to content

Reference for hub_sdk/modules/datasets.py

Note

This file is available at https://github.com/ultralytics/hub-sdk/blob/main/hub_sdk/modules/datasets.py. If you spot a problem please help fix it by contributing a Pull Request 🛠️. Thank you 🙏!


hub_sdk.modules.datasets.Datasets

Datasets(dataset_id: str | None = None, headers: dict[str, Any] | None = None)

Bases: CRUDClient


              flowchart TD
              hub_sdk.modules.datasets.Datasets[Datasets]
              hub_sdk.base.crud_client.CRUDClient[CRUDClient]
              hub_sdk.base.api_client.APIClient[APIClient]

                              hub_sdk.base.crud_client.CRUDClient --> hub_sdk.modules.datasets.Datasets
                                hub_sdk.base.api_client.APIClient --> hub_sdk.base.crud_client.CRUDClient
                



              click hub_sdk.modules.datasets.Datasets href "" "hub_sdk.modules.datasets.Datasets"
              click hub_sdk.base.crud_client.CRUDClient href "" "hub_sdk.base.crud_client.CRUDClient"
              click hub_sdk.base.api_client.APIClient href "" "hub_sdk.base.api_client.APIClient"
            

A class representing a client for interacting with Datasets through CRUD operations.

This class extends the CRUDClient class and provides specific methods for working with Datasets.

Attributes:

NameTypeDescription
hub_client DatasetUpload

An instance of DatasetUpload used for interacting with dataset uploads.

id str | None

The unique identifier of the dataset, if available.

data Dict

A dictionary to store dataset data.

Notes

The 'id' attribute is set during initialization and can be used to uniquely identify a dataset. The 'data' attribute is used to store dataset data fetched from the API.

Parameters:

NameTypeDescriptionDefault
dataset_id str

Unique id of the dataset.

None
headers Dict

Headers to include in HTTP requests.

None
Source code in hub_sdk/modules/datasets.py
29
30
31
32
33
34
35
36
37
38
39
40
41
def __init__(self, dataset_id: str | None = None, headers: dict[str, Any] | None = None):
    """Initialize a Datasets client.

    Args:
        dataset_id (str, optional): Unique id of the dataset.
        headers (Dict, optional): Headers to include in HTTP requests.
    """
    super().__init__("datasets", "dataset", headers)
    self.hub_client = DatasetUpload(headers)
    self.id = dataset_id
    self.data = {}
    if dataset_id:
        self.get_data()

create_dataset

create_dataset(dataset_data: dict) -> None

Create a new dataset with the provided data and set the dataset ID for the current instance.

Parameters:

NameTypeDescriptionDefault
dataset_data Dict

A dictionary containing the data for creating the dataset.

required
Source code in hub_sdk/modules/datasets.py
76
77
78
79
80
81
82
83
84
def create_dataset(self, dataset_data: dict) -> None:
    """Create a new dataset with the provided data and set the dataset ID for the current instance.

    Args:
        dataset_data (Dict): A dictionary containing the data for creating the dataset.
    """
    resp = super().create(dataset_data).json()
    self.id = resp.get("data", {}).get("id")
    self.get_data()

delete

delete(hard: bool = False) -> Response | None

Delete the dataset resource represented by this instance.

Parameters:

NameTypeDescriptionDefault
hard bool

If True, perform a hard delete.

False

Returns:

TypeDescription
Optional[Response]

Response object from the delete request, or None if delete fails.

Notes

The 'hard' parameter determines whether to perform a soft delete (default) or a hard delete. In a soft delete, the dataset might be marked as deleted but retained in the system. In a hard delete, the dataset is permanently removed from the system.

Source code in hub_sdk/modules/datasets.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def delete(self, hard: bool = False) -> Response | None:
    """Delete the dataset resource represented by this instance.

    Args:
        hard (bool, optional): If True, perform a hard delete.

    Returns:
        (Optional[Response]): Response object from the delete request, or None if delete fails.

    Notes:
        The 'hard' parameter determines whether to perform a soft delete (default) or a hard delete.
        In a soft delete, the dataset might be marked as deleted but retained in the system.
        In a hard delete, the dataset is permanently removed from the system.
    """
    return super().delete(self.id, hard)

get_data

get_data() -> None

Retrieve data for the current dataset instance.

If a valid dataset ID has been set, it sends a request to fetch the dataset data and stores it in the instance. If no dataset ID has been set, it logs an error message.

Source code in hub_sdk/modules/datasets.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def get_data(self) -> None:
    """Retrieve data for the current dataset instance.

    If a valid dataset ID has been set, it sends a request to fetch the dataset data and stores it in the instance.
    If no dataset ID has been set, it logs an error message.
    """
    if not self.id:
        self.logger.error("No dataset id has been set. Update the dataset id or create a dataset.")
        return

    try:
        response = super().read(self.id)

        if response is None:
            self.logger.error(f"Received no response from the server for dataset ID: {self.id}")
            return

        # Check if the response has a .json() method (it should if it's a response object)
        if not hasattr(response, "json"):
            self.logger.error(f"Invalid response object received for dataset ID: {self.id}")
            return

        resp_data = response.json()
        if resp_data is None:
            self.logger.error(f"No data received in the response for dataset ID: {self.id}")
            return

        self.data = resp_data.get("data", {})
        self.logger.debug(f"Dataset data retrieved for ID: {self.id}")

    except Exception as e:
        self.logger.error(f"An error occurred while retrieving data for dataset ID: {self.id}, {e}")
get_download_link() -> str | None

Get dataset download link.

Returns:

TypeDescription
Optional[str]

Return download link or None if the link is not available.

Source code in hub_sdk/modules/datasets.py
124
125
126
127
128
129
130
def get_download_link(self) -> str | None:
    """Get dataset download link.

    Returns:
        (Optional[str]): Return download link or None if the link is not available.
    """
    return self.data.get("url")

update

update(data: dict) -> Response | None

Update the dataset resource represented by this instance.

Parameters:

NameTypeDescriptionDefault
data Dict

The updated data for the dataset resource.

required

Returns:

TypeDescription
Optional[Response]

Response object from the update request, or None if update fails.

Source code in hub_sdk/modules/datasets.py
102
103
104
105
106
107
108
109
110
111
def update(self, data: dict) -> Response | None:
    """Update the dataset resource represented by this instance.

    Args:
        data (Dict): The updated data for the dataset resource.

    Returns:
        (Optional[Response]): Response object from the update request, or None if update fails.
    """
    return super().update(self.id, data)

upload_dataset

upload_dataset(file: str | None = None) -> Response | None

Upload a dataset file to the hub.

Parameters:

NameTypeDescriptionDefault
file str

The path to the dataset file to upload.

None

Returns:

TypeDescription
Optional[Response]

Response object from the upload request, or None if upload fails.

Source code in hub_sdk/modules/datasets.py
113
114
115
116
117
118
119
120
121
122
def upload_dataset(self, file: str | None = None) -> Response | None:
    """Upload a dataset file to the hub.

    Args:
        file (str, optional): The path to the dataset file to upload.

    Returns:
        (Optional[Response]): Response object from the upload request, or None if upload fails.
    """
    return self.hub_client.upload_dataset(self.id, file)





hub_sdk.modules.datasets.DatasetList

DatasetList(page_size=None, public=None, headers=None)

Bases: PaginatedList


              flowchart TD
              hub_sdk.modules.datasets.DatasetList[DatasetList]
              hub_sdk.base.paginated_list.PaginatedList[PaginatedList]
              hub_sdk.base.api_client.APIClient[APIClient]

                              hub_sdk.base.paginated_list.PaginatedList --> hub_sdk.modules.datasets.DatasetList
                                hub_sdk.base.api_client.APIClient --> hub_sdk.base.paginated_list.PaginatedList
                



              click hub_sdk.modules.datasets.DatasetList href "" "hub_sdk.modules.datasets.DatasetList"
              click hub_sdk.base.paginated_list.PaginatedList href "" "hub_sdk.base.paginated_list.PaginatedList"
              click hub_sdk.base.api_client.APIClient href "" "hub_sdk.base.api_client.APIClient"
            

A class for managing a paginated list of datasets from the Ultralytics Hub API.

Parameters:

NameTypeDescriptionDefault
page_size int

The number of items to request per page.

None
public bool

Whether the items should be publicly accessible.

None
headers Dict

Headers to be included in API requests.

None
Source code in hub_sdk/modules/datasets.py
136
137
138
139
140
141
142
143
144
145
def __init__(self, page_size=None, public=None, headers=None):
    """Initialize a DatasetList instance.

    Args:
        page_size (int, optional): The number of items to request per page.
        public (bool, optional): Whether the items should be publicly accessible.
        headers (Dict, optional): Headers to be included in API requests.
    """
    base_endpoint = "datasets"
    super().__init__(base_endpoint, "dataset", page_size, public, headers)





📅 Created 1 year ago ✏️ Updated 1 month ago
glenn-jocher