Skip to content

Reference for ultralytics/utils/triton.py

Improvements

This page is sourced from https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/triton.py. Have an improvement or example to add? Open a Pull Request — thank you! 🙏


class ultralytics.utils.triton.TritonRemoteModel

TritonRemoteModel(self, url: str, endpoint: str = "", scheme: str = "")

Client for interacting with a remote Triton Inference Server model.

This class provides a convenient interface for sending inference requests to a Triton Inference Server and processing the responses. Supports both HTTP and gRPC communication protocols.

Arguments may be provided individually or parsed from a collective 'url' argument of the form :////

Args

NameTypeDescriptionDefault
urlstrThe URL of the Triton server.required
endpointstr, optionalThe name of the model on the Triton server.""
schemestr, optionalThe communication scheme ('http' or 'grpc').""

Attributes

NameTypeDescription
endpointstrThe name of the model on the Triton server.
urlstrThe URL of the Triton server.
triton_clientThe Triton client (either HTTP or gRPC).
InferInputThe input class for the Triton client.
InferRequestedOutputThe output request class for the Triton client.
input_formatslist[str]The data types of the model inputs.
np_input_formatslist[type]The numpy data types of the model inputs.
input_nameslist[str]The names of the model inputs.
output_nameslist[str]The names of the model outputs.
metadataThe metadata associated with the model.

Methods

NameDescription
__call__Call the model with the given inputs and return inference results.

Examples

Initialize a Triton client with HTTP
>>> model = TritonRemoteModel(url="localhost:8000", endpoint="yolov8", scheme="http")

Make inference with numpy arrays
>>> outputs = model(np.random.rand(1, 3, 640, 640).astype(np.float32))
Source code in ultralytics/utils/triton.pyView on GitHub
class TritonRemoteModel:
    """Client for interacting with a remote Triton Inference Server model.

    This class provides a convenient interface for sending inference requests to a Triton Inference Server and
    processing the responses. Supports both HTTP and gRPC communication protocols.

    Attributes:
        endpoint (str): The name of the model on the Triton server.
        url (str): The URL of the Triton server.
        triton_client: The Triton client (either HTTP or gRPC).
        InferInput: The input class for the Triton client.
        InferRequestedOutput: The output request class for the Triton client.
        input_formats (list[str]): The data types of the model inputs.
        np_input_formats (list[type]): The numpy data types of the model inputs.
        input_names (list[str]): The names of the model inputs.
        output_names (list[str]): The names of the model outputs.
        metadata: The metadata associated with the model.

    Methods:
        __call__: Call the model with the given inputs and return the outputs.

    Examples:
        Initialize a Triton client with HTTP
        >>> model = TritonRemoteModel(url="localhost:8000", endpoint="yolov8", scheme="http")

        Make inference with numpy arrays
        >>> outputs = model(np.random.rand(1, 3, 640, 640).astype(np.float32))
    """

    def __init__(self, url: str, endpoint: str = "", scheme: str = ""):
        """Initialize the TritonRemoteModel for interacting with a remote Triton Inference Server.

        Arguments may be provided individually or parsed from a collective 'url' argument of the form
        <scheme>://<netloc>/<endpoint>/<task_name>

        Args:
            url (str): The URL of the Triton server.
            endpoint (str, optional): The name of the model on the Triton server.
            scheme (str, optional): The communication scheme ('http' or 'grpc').
        """
        if not endpoint and not scheme:  # Parse all args from URL string
            splits = urlsplit(url)
            endpoint = splits.path.strip("/").split("/", 1)[0]
            scheme = splits.scheme
            url = splits.netloc

        self.endpoint = endpoint
        self.url = url

        # Choose the Triton client based on the communication scheme
        if scheme == "http":
            import tritonclient.http as client

            self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False)
            config = self.triton_client.get_model_config(endpoint)
        else:
            import tritonclient.grpc as client

            self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False)
            config = self.triton_client.get_model_config(endpoint, as_json=True)["config"]

        # Sort output names alphabetically, i.e. 'output0', 'output1', etc.
        config["output"] = sorted(config["output"], key=lambda x: x.get("name"))

        # Define model attributes
        type_map = {"TYPE_FP32": np.float32, "TYPE_FP16": np.float16, "TYPE_UINT8": np.uint8}
        self.InferRequestedOutput = client.InferRequestedOutput
        self.InferInput = client.InferInput
        self.input_formats = [x["data_type"] for x in config["input"]]
        self.np_input_formats = [type_map[x] for x in self.input_formats]
        self.input_names = [x["name"] for x in config["input"]]
        self.output_names = [x["name"] for x in config["output"]]
        self.metadata = ast.literal_eval(config.get("parameters", {}).get("metadata", {}).get("string_value", "None"))


method ultralytics.utils.triton.TritonRemoteModel.__call__

def __call__(self, *inputs: np.ndarray) -> list[np.ndarray]

Call the model with the given inputs and return inference results.

Args

NameTypeDescriptionDefault
*inputsnp.ndarrayInput data to the model. Each array should match the expected shape and type for the corresponding model input.required

Returns

TypeDescription
list[np.ndarray]Model outputs with the same dtype as the input. Each element in the list corresponds to

Examples

>>> model = TritonRemoteModel(url="localhost:8000", endpoint="yolov8", scheme="http")
>>> outputs = model(np.random.rand(1, 3, 640, 640).astype(np.float32))
Source code in ultralytics/utils/triton.pyView on GitHub
def __call__(self, *inputs: np.ndarray) -> list[np.ndarray]:
    """Call the model with the given inputs and return inference results.

    Args:
        *inputs (np.ndarray): Input data to the model. Each array should match the expected shape and type for the
            corresponding model input.

    Returns:
        (list[np.ndarray]): Model outputs with the same dtype as the input. Each element in the list corresponds to
            one of the model's output tensors.

    Examples:
        >>> model = TritonRemoteModel(url="localhost:8000", endpoint="yolov8", scheme="http")
        >>> outputs = model(np.random.rand(1, 3, 640, 640).astype(np.float32))
    """
    infer_inputs = []
    input_format = inputs[0].dtype
    for i, x in enumerate(inputs):
        if x.dtype != self.np_input_formats[i]:
            x = x.astype(self.np_input_formats[i])
        infer_input = self.InferInput(self.input_names[i], [*x.shape], self.input_formats[i].replace("TYPE_", ""))
        infer_input.set_data_from_numpy(x)
        infer_inputs.append(infer_input)

    infer_outputs = [self.InferRequestedOutput(output_name) for output_name in self.output_names]
    outputs = self.triton_client.infer(model_name=self.endpoint, inputs=infer_inputs, outputs=infer_outputs)

    return [outputs.as_numpy(output_name).astype(input_format) for output_name in self.output_names]





📅 Created 2 years ago ✏️ Updated 2 days ago
glenn-jocherjk4eBurhan-Q