Meet YOLO26: next-gen vision AI.

Link to this sectionReference for ultralytics/solutions/similarity_search.py#

Improvements

This page is sourced from https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/similarity_search.py. Have an improvement or example to add? Open a Pull Request — thank you! 🙏


Summary

Link to this sectionClass ultralytics.solutions.similarity_search.VisualAISearch#

VisualAISearch(self, **kwargs: Any) -> None

A semantic image search system that leverages OpenAI's CLIP for generating high-quality image and text embeddings

and NumPy cosine similarity for fast similarity-based retrieval.

This class aligns image and text embeddings in a shared semantic space, enabling users to search large collections of images using natural language queries with high accuracy and speed.

Args

NameTypeDescriptionDefault
**kwargsAnyrequired

Attributes

NameTypeDescription
datastrDirectory containing images.
devicestrComputation device, e.g., 'cpu' or 'cuda'.
index_pathstrPath to the numpy file storing image embeddings.
data_path_npystrPath to the numpy file storing image paths.
data_dirPathPath object for the data directory.
modelLoaded CLIP model.
indexnp.ndarrayL2-normalized image embeddings used for cosine similarity search.
image_pathslist[str]List of image file paths.

Methods

NameDescription
__call__Direct call interface for the search function.
_normalizeL2-normalize each row of x so inner products equal cosine similarity.
extract_image_featureExtract CLIP image embedding from the given image path.
extract_text_featureExtract CLIP text embedding from the given text query.
load_or_build_indexLoad existing image embeddings or build them from the image directory.
searchReturn top-k semantically similar images to the given query.

Examples

Initialize and search for images
>>> searcher = VisualAISearch(data="path/to/images", device="cuda")
>>> results = searcher.search("a cat sitting on a chair", k=10)
Source code in ultralytics/solutions/similarity_search.py

View on GitHub

class VisualAISearch:
    """A semantic image search system that leverages OpenAI's CLIP for generating high-quality image and text embeddings
    and NumPy cosine similarity for fast similarity-based retrieval.

    This class aligns image and text embeddings in a shared semantic space, enabling users to search large collections
    of images using natural language queries with high accuracy and speed.

    Attributes:
        data (str): Directory containing images.
        device (str): Computation device, e.g., 'cpu' or 'cuda'.
        index_path (str): Path to the numpy file storing image embeddings.
        data_path_npy (str): Path to the numpy file storing image paths.
        data_dir (Path): Path object for the data directory.
        model: Loaded CLIP model.
        index (np.ndarray): L2-normalized image embeddings used for cosine similarity search.
        image_paths (list[str]): List of image file paths.

    Methods:
        extract_image_feature: Extract CLIP embedding from an image.
        extract_text_feature: Extract CLIP embedding from text.
        load_or_build_index: Load existing embeddings or build them from images.
        search: Perform semantic search for similar images.

    Examples:
        Initialize and search for images
        >>> searcher = VisualAISearch(data="path/to/images", device="cuda")
        >>> results = searcher.search("a cat sitting on a chair", k=10)
    """

    def __init__(self, **kwargs: Any) -> None:
        """Initialize the VisualAISearch class with the embedding index and CLIP model."""
        assert TORCH_2_4, f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})"
        from ultralytics.nn.text_model import build_text_model

        self.index_path = "embeddings.npy"
        self.data_path_npy = "paths.npy"
        self.data_dir = Path(kwargs.get("data", "images"))
        self.device = select_device(kwargs.get("device", "cpu"))

        if not self.data_dir.exists():
            from ultralytics.utils import ASSETS_URL

            LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
            from ultralytics.utils.downloads import safe_download

            safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
            self.data_dir = Path("images")

        self.model = build_text_model("clip:ViT-B/32", device=self.device)

        self.index = None
        self.image_paths = []

        self.load_or_build_index()

Link to this sectionMethod ultralytics.solutions.similarity_search.VisualAISearch.__call__#

def __call__(self, query: str) -> list[str]

Direct call interface for the search function.

Args

NameTypeDescriptionDefault
querystrrequired
Source code in ultralytics/solutions/similarity_search.py

View on GitHub

def __call__(self, query: str) -> list[str]:
    """Direct call interface for the search function."""
    return self.search(query)

Link to this sectionMethod ultralytics.solutions.similarity_search.VisualAISearch._normalize#

def _normalize(x: np.ndarray) -> np.ndarray

L2-normalize each row of x so inner products equal cosine similarity.

Args

NameTypeDescriptionDefault
xnp.ndarrayFeature array of shape (N, D).required

Returns

TypeDescription
np.ndarrayRow-wise L2-normalized array with the same shape as the input.
Source code in ultralytics/solutions/similarity_search.py

View on GitHub

@staticmethod
def _normalize(x: np.ndarray) -> np.ndarray:
    """L2-normalize each row of `x` so inner products equal cosine similarity.

    Args:
        x (np.ndarray): Feature array of shape (N, D).

    Returns:
        (np.ndarray): Row-wise L2-normalized array with the same shape as the input.
    """
    return x / np.maximum(np.linalg.norm(x, axis=1, keepdims=True), 1e-12)

Link to this sectionMethod ultralytics.solutions.similarity_search.VisualAISearch.extract_image_feature#

def extract_image_feature(self, path: Path) -> np.ndarray

Extract CLIP image embedding from the given image path.

Args

NameTypeDescriptionDefault
pathPathrequired
Source code in ultralytics/solutions/similarity_search.py

View on GitHub

def extract_image_feature(self, path: Path) -> np.ndarray:
    """Extract CLIP image embedding from the given image path."""
    return self.model.encode_image(Image.open(path)).detach().cpu().numpy()

Link to this sectionMethod ultralytics.solutions.similarity_search.VisualAISearch.extract_text_feature#

def extract_text_feature(self, text: str) -> np.ndarray

Extract CLIP text embedding from the given text query.

Args

NameTypeDescriptionDefault
textstrrequired
Source code in ultralytics/solutions/similarity_search.py

View on GitHub

def extract_text_feature(self, text: str) -> np.ndarray:
    """Extract CLIP text embedding from the given text query."""
    return self.model.encode_text(self.model.tokenize([text])).detach().cpu().numpy()

Link to this sectionMethod ultralytics.solutions.similarity_search.VisualAISearch.load_or_build_index#

def load_or_build_index(self) -> None

Load existing image embeddings or build them from the image directory.

Checks if the embeddings and image paths exist on disk. If found, loads them directly. Otherwise, builds the index by extracting features from all images in the data directory, L2-normalizes them, and saves both the embeddings and image paths for future use.

Source code in ultralytics/solutions/similarity_search.py

View on GitHub

def load_or_build_index(self) -> None:
    """Load existing image embeddings or build them from the image directory.

    Checks if the embeddings and image paths exist on disk. If found, loads them directly. Otherwise, builds the
    index by extracting features from all images in the data directory, L2-normalizes them, and saves both the
    embeddings and image paths for future use.
    """
    # Check if the embeddings and corresponding image paths already exist
    if Path(self.index_path).exists() and Path(self.data_path_npy).exists():
        LOGGER.info("Loading existing embeddings...")
        self.index = np.load(self.index_path)  # Load the L2-normalized embeddings from disk
        self.image_paths = np.load(self.data_path_npy)  # Load the saved image path list
        return  # Exit the function as the index is successfully loaded

    # If the embeddings don't exist, start building them from scratch
    LOGGER.info("Building embeddings from images...")
    vectors = []  # List to store feature vectors of images

    # Iterate over all image files in the data directory
    for file in self.data_dir.iterdir():
        # Skip files that are not valid image formats
        if file.suffix.lower().lstrip(".") not in IMG_FORMATS:
            continue
        try:
            # Extract feature vector for the image and add to the list
            vectors.append(self.extract_image_feature(file))
            self.image_paths.append(file.name)  # Store the corresponding image name
        except Exception as e:
            LOGGER.warning(f"Skipping {file.name}: {e}")

    # If no vectors were successfully created, raise an error
    if not vectors:
        raise RuntimeError("No image embeddings could be generated.")

    vectors = np.vstack(vectors).astype("float32")  # Stack all vectors into a NumPy array and convert to float32
    self.index = self._normalize(vectors)  # L2-normalize so inner product equals cosine similarity
    np.save(self.index_path, self.index)  # Save the embeddings to disk
    np.save(self.data_path_npy, np.array(self.image_paths))  # Save the list of image paths to disk

    LOGGER.info(f"Indexed {len(self.image_paths)} images.")

Link to this sectionMethod ultralytics.solutions.similarity_search.VisualAISearch.search#

def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> list[str]

Return top-k semantically similar images to the given query.

Args

NameTypeDescriptionDefault
querystrNatural language text query to search for.required
kint, optionalMaximum number of results to return.30
similarity_threshfloat, optionalMinimum similarity threshold for filtering results.0.1

Returns

TypeDescription
list[str]List of image filenames ranked by similarity score.

Examples

Search for images matching a query
>>> searcher = VisualAISearch(data="images")
>>> results = searcher.search("red car", k=5, similarity_thresh=0.2)
Source code in ultralytics/solutions/similarity_search.py

View on GitHub

def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> list[str]:
    """Return top-k semantically similar images to the given query.

    Args:
        query (str): Natural language text query to search for.
        k (int, optional): Maximum number of results to return.
        similarity_thresh (float, optional): Minimum similarity threshold for filtering results.

    Returns:
        (list[str]): List of image filenames ranked by similarity score.

    Examples:
        Search for images matching a query
        >>> searcher = VisualAISearch(data="images")
        >>> results = searcher.search("red car", k=5, similarity_thresh=0.2)
    """
    text_feat = self._normalize(self.extract_text_feature(query).astype("float32"))
    scores = self.index @ text_feat[0]  # cosine similarity (embeddings are L2-normalized)
    top_k = np.argsort(scores)[::-1][: max(k, 0)]
    results = [(self.image_paths[i], float(scores[i])) for i in top_k if scores[i] >= similarity_thresh]
    results.sort(key=lambda x: x[1], reverse=True)

    LOGGER.info("\nRanked Results:")
    for name, score in results:
        LOGGER.info(f"  - {name} | Similarity: {score:.4f}")

    return [r[0] for r in results]





Link to this sectionClass ultralytics.solutions.similarity_search.SearchApp#

SearchApp(self, data: str = "images", device: str | None = None) -> None

A Flask-based web interface for semantic image search with natural language queries.

This class provides a clean, responsive frontend that enables users to input natural language queries and instantly view the most relevant images retrieved from the indexed database.

Args

NameTypeDescriptionDefault
datastr, optionalPath to directory containing images to index and search."images"
devicestr, optionalDevice to run inference on (e.g. 'cpu', 'cuda').None

Attributes

NameTypeDescription
render_templateFlask template rendering function.
requestFlask request object.
searcherVisualAISearchInstance of the VisualAISearch class.
appFlaskFlask application instance.

Methods

NameDescription
indexProcess user query and display search results in the web interface.
runStart the Flask web application server.

Examples

Start a search application
>>> app = SearchApp(data="path/to/images", device="cuda")
>>> app.run(debug=True)
Source code in ultralytics/solutions/similarity_search.py

View on GitHub

class SearchApp:
    """A Flask-based web interface for semantic image search with natural language queries.

    This class provides a clean, responsive frontend that enables users to input natural language queries and instantly
    view the most relevant images retrieved from the indexed database.

    Attributes:
        render_template: Flask template rendering function.
        request: Flask request object.
        searcher (VisualAISearch): Instance of the VisualAISearch class.
        app (Flask): Flask application instance.

    Methods:
        index: Process user queries and display search results.
        run: Start the Flask web application.

    Examples:
        Start a search application
        >>> app = SearchApp(data="path/to/images", device="cuda")
        >>> app.run(debug=True)
    """

    def __init__(self, data: str = "images", device: str | None = None) -> None:
        """Initialize the SearchApp with VisualAISearch backend.

        Args:
            data (str, optional): Path to directory containing images to index and search.
            device (str, optional): Device to run inference on (e.g. 'cpu', 'cuda').
        """
        check_requirements("flask>=3.0.1")
        from flask import Flask, render_template, request

        self.render_template = render_template
        self.request = request
        self.searcher = VisualAISearch(data=data, device=device)
        self.app = Flask(
            __name__,
            template_folder="templates",
            static_folder=Path(data).resolve(),  # Absolute path to serve images
            static_url_path="/images",  # URL prefix for images
        )
        self.app.add_url_rule("/", view_func=self.index, methods=["GET", "POST"])

Link to this sectionMethod ultralytics.solutions.similarity_search.SearchApp.index#

def index(self) -> str

Process user query and display search results in the web interface.

Source code in ultralytics/solutions/similarity_search.py

View on GitHub

def index(self) -> str:
    """Process user query and display search results in the web interface."""
    results = []
    if self.request.method == "POST":
        query = self.request.form.get("query", "").strip()
        results = self.searcher(query)
    return self.render_template("similarity-search.html", results=results)

Link to this sectionMethod ultralytics.solutions.similarity_search.SearchApp.run#

def run(self, debug: bool = False) -> None

Start the Flask web application server.

Args

NameTypeDescriptionDefault
debugboolFalse
Source code in ultralytics/solutions/similarity_search.py

View on GitHub

def run(self, debug: bool = False) -> None:
    """Start the Flask web application server."""
    self.app.run(debug=debug)