ΠŸΠ΅Ρ€Π΅ΠΉΡ‚ΠΈ ΠΊ содСрТимому

Бсылка для ultralytics/trackers/utils/gmc.py

ΠŸΡ€ΠΈΠΌΠ΅Ρ‡Π°Π½ΠΈΠ΅

Π­Ρ‚ΠΎΡ‚ Ρ„Π°ΠΉΠ» доступСн ΠΏΠΎ адрСсу https://github.com/ultralytics/ ultralytics/blob/main/ ultralytics/trackers/utils/gmc .py. Если Ρ‚Ρ‹ Π·Π°ΠΌΠ΅Ρ‚ΠΈΠ» ΠΏΡ€ΠΎΠ±Π»Π΅ΠΌΡƒ, поТалуйста, ΠΏΠΎΠΌΠΎΠ³ΠΈ ΠΈΡΠΏΡ€Π°Π²ΠΈΡ‚ΡŒ Π΅Π΅, ΠΎΡ‚ΠΏΡ€Π°Π²ΠΈΠ² Pull Request πŸ› οΈ. Бпасибо πŸ™!



ultralytics.trackers.utils.gmc.GMC

Класс Generalized Motion Compensation (GMC) для отслСТивания ΠΈ обнаруТСния ΠΎΠ±ΡŠΠ΅ΠΊΡ‚ΠΎΠ² Π² Π²ΠΈΠ΄Π΅ΠΎΠΊΠ°Π΄Ρ€Π°Ρ….

Π­Ρ‚ΠΎΡ‚ класс прСдоставляСт ΠΌΠ΅Ρ‚ΠΎΠ΄Ρ‹ для отслСТивания ΠΈ обнаруТСния ΠΎΠ±ΡŠΠ΅ΠΊΡ‚ΠΎΠ² Π½Π° основС Π½Π΅ΡΠΊΠΎΠ»ΡŒΠΊΠΈΡ… Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠΎΠ² отслСТивания, Π²ΠΊΠ»ΡŽΡ‡Π°Ρ ORB, SIFT, ECC ΠΈ Sparse Optical Flow. Он Ρ‚Π°ΠΊΠΆΠ΅ ΠΏΠΎΠ΄Π΄Π΅Ρ€ΠΆΠΈΠ²Π°Π΅Ρ‚ ΡƒΠΌΠ΅Π½ΡŒΡˆΠ΅Π½ΠΈΠ΅ ΠΌΠ°ΡΡˆΡ‚Π°Π±Π° ΠΊΠ°Π΄Ρ€ΠΎΠ² для ΠΏΠΎΠ²Ρ‹ΡˆΠ΅Π½ΠΈΡ эффСктивности вычислСний.

Атрибуты:

Имя Вип ОписаниС
method str

ΠœΠ΅Ρ‚ΠΎΠ΄, ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌΡ‹ΠΉ для отслСТивания. Π’Π°Ρ€ΠΈΠ°Π½Ρ‚Ρ‹: 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.

downscale int

ΠšΠΎΡΡ„Ρ„ΠΈΡ†ΠΈΠ΅Π½Ρ‚, Π½Π° ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π½ΡƒΠΆΠ½ΠΎ ΡƒΠΌΠ΅Π½ΡŒΡˆΠΈΡ‚ΡŒ ΠΌΠ°ΡΡˆΡ‚Π°Π± ΠΊΠ°Π΄Ρ€ΠΎΠ² для ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠΈ.

prevFrame ndarray

БохраняСт ΠΏΡ€Π΅Π΄Ρ‹Π΄ΡƒΡ‰ΠΈΠΉ ΠΊΠ°Π΄Ρ€ для отслСТивания.

prevKeyPoints list

БохраняСт ΠΊΠ»ΡŽΡ‡Π΅Π²Ρ‹Π΅ Ρ‚ΠΎΡ‡ΠΊΠΈ ΠΈΠ· ΠΏΡ€Π΅Π΄Ρ‹Π΄ΡƒΡ‰Π΅Π³ΠΎ ΠΊΠ°Π΄Ρ€Π°.

prevDescriptors ndarray

БохраняСт дСскрипторы ΠΈΠ· ΠΏΡ€Π΅Π΄Ρ‹Π΄ΡƒΡ‰Π΅Π³ΠΎ ΠΊΠ°Π΄Ρ€Π°.

initializedFirstFrame bool

Π€Π»Π°Π³, ΡƒΠΊΠ°Π·Ρ‹Π²Π°ΡŽΡ‰ΠΈΠΉ, Π±Ρ‹Π» Π»ΠΈ ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½ ΠΏΠ΅Ρ€Π²Ρ‹ΠΉ ΠΊΠ°Π΄Ρ€.

ΠœΠ΅Ρ‚ΠΎΠ΄Ρ‹:

Имя ОписаниС
__init__

Π˜Π½ΠΈΡ†ΠΈΠ°Π»ΠΈΠ·ΠΈΡ€ΡƒΠ΅Ρ‚ ΠΎΠ±ΡŠΠ΅ΠΊΡ‚ GMC с ΡƒΠΊΠ°Π·Π°Π½Π½Ρ‹ΠΌ ΠΌΠ΅Ρ‚ΠΎΠ΄ΠΎΠΌ ΠΈ коэффициСнтом ΡƒΠΌΠ΅Π½ΡŒΡˆΠ΅Π½ΠΈΡ.

apply

ΠŸΡ€ΠΈΠΌΠ΅Π½ΡΠ΅Ρ‚ Π²Ρ‹Π±Ρ€Π°Π½Π½Ρ‹ΠΉ ΠΌΠ΅Ρ‚ΠΎΠ΄ ΠΊ Π½Π΅ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½ΠΎΠΌΡƒ ΠΊΠ°Π΄Ρ€Ρƒ ΠΈ, ΠΏΠΎ ТСланию, ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅Ρ‚ прСдоставлСнныС обнаруТСния.

applyEcc

ΠŸΡ€ΠΈΠΌΠ΅Π½ΡΠ΅Ρ‚ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌ ECC ΠΊ Π½Π΅ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½ΠΎΠΌΡƒ ΠΊΠ°Π΄Ρ€Ρƒ.

applyFeatures

ΠŸΡ€ΠΈΠΌΠ΅Π½ΡΠΉ ΠΊ Π½Π΅ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½ΠΎΠΌΡƒ ΠΊΠ°Π΄Ρ€Ρƒ ΠΌΠ΅Ρ‚ΠΎΠ΄Ρ‹, основанныС Π½Π° ΠΏΡ€ΠΈΠ·Π½Π°ΠΊΠ°Ρ…, Ρ‚Π°ΠΊΠΈΠ΅ ΠΊΠ°ΠΊ ORB ΠΈΠ»ΠΈ SIFT.

applySparseOptFlow

ΠŸΡ€ΠΈΠΌΠ΅Π½ΠΈ ΠΌΠ΅Ρ‚ΠΎΠ΄ Sparse Optical Flow ΠΊ Π½Π΅ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½ΠΎΠΌΡƒ ΠΊΠ°Π΄Ρ€Ρƒ.

Π˜ΡΡ…ΠΎΠ΄Π½Ρ‹ΠΉ ΠΊΠΎΠ΄ Π² ultralytics/trackers/utils/gmc.py
class GMC:
    """
    Generalized Motion Compensation (GMC) class for tracking and object detection in video frames.

    This class provides methods for tracking and detecting objects based on several tracking algorithms including ORB,
    SIFT, ECC, and Sparse Optical Flow. It also supports downscaling of frames for computational efficiency.

    Attributes:
        method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
        downscale (int): Factor by which to downscale the frames for processing.
        prevFrame (np.ndarray): Stores the previous frame for tracking.
        prevKeyPoints (list): Stores the keypoints from the previous frame.
        prevDescriptors (np.ndarray): Stores the descriptors from the previous frame.
        initializedFirstFrame (bool): Flag to indicate if the first frame has been processed.

    Methods:
        __init__(self, method='sparseOptFlow', downscale=2): Initializes a GMC object with the specified method
                                                              and downscale factor.
        apply(self, raw_frame, detections=None): Applies the chosen method to a raw frame and optionally uses
                                                 provided detections.
        applyEcc(self, raw_frame, detections=None): Applies the ECC algorithm to a raw frame.
        applyFeatures(self, raw_frame, detections=None): Applies feature-based methods like ORB or SIFT to a raw frame.
        applySparseOptFlow(self, raw_frame, detections=None): Applies the Sparse Optical Flow method to a raw frame.
    """

    def __init__(self, method: str = "sparseOptFlow", downscale: int = 2) -> None:
        """
        Initialize a video tracker with specified parameters.

        Args:
            method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
            downscale (int): Downscale factor for processing frames.
        """
        super().__init__()

        self.method = method
        self.downscale = max(1, int(downscale))

        if self.method == "orb":
            self.detector = cv2.FastFeatureDetector_create(20)
            self.extractor = cv2.ORB_create()
            self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)

        elif self.method == "sift":
            self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
            self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
            self.matcher = cv2.BFMatcher(cv2.NORM_L2)

        elif self.method == "ecc":
            number_of_iterations = 5000
            termination_eps = 1e-6
            self.warp_mode = cv2.MOTION_EUCLIDEAN
            self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)

        elif self.method == "sparseOptFlow":
            self.feature_params = dict(
                maxCorners=1000, qualityLevel=0.01, minDistance=1, blockSize=3, useHarrisDetector=False, k=0.04
            )

        elif self.method in {"none", "None", None}:
            self.method = None
        else:
            raise ValueError(f"Error: Unknown GMC method:{method}")

        self.prevFrame = None
        self.prevKeyPoints = None
        self.prevDescriptors = None
        self.initializedFirstFrame = False

    def apply(self, raw_frame: np.array, detections: list = None) -> np.array:
        """
        Apply object detection on a raw frame using specified method.

        Args:
            raw_frame (np.ndarray): The raw frame to be processed.
            detections (list): List of detections to be used in the processing.

        Returns:
            (np.ndarray): Processed frame.

        Examples:
            >>> gmc = GMC()
            >>> gmc.apply(np.array([[1, 2, 3], [4, 5, 6]]))
            array([[1, 2, 3],
                   [4, 5, 6]])
        """
        if self.method in {"orb", "sift"}:
            return self.applyFeatures(raw_frame, detections)
        elif self.method == "ecc":
            return self.applyEcc(raw_frame)
        elif self.method == "sparseOptFlow":
            return self.applySparseOptFlow(raw_frame)
        else:
            return np.eye(2, 3)

    def applyEcc(self, raw_frame: np.array) -> np.array:
        """
        Apply ECC algorithm to a raw frame.

        Args:
            raw_frame (np.ndarray): The raw frame to be processed.

        Returns:
            (np.ndarray): Processed frame.

        Examples:
            >>> gmc = GMC()
            >>> gmc.applyEcc(np.array([[1, 2, 3], [4, 5, 6]]))
            array([[1, 2, 3],
                   [4, 5, 6]])
        """
        height, width, _ = raw_frame.shape
        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
        H = np.eye(2, 3, dtype=np.float32)

        # Downscale image
        if self.downscale > 1.0:
            frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
            width = width // self.downscale
            height = height // self.downscale

        # Handle first frame
        if not self.initializedFirstFrame:
            # Initialize data
            self.prevFrame = frame.copy()

            # Initialization done
            self.initializedFirstFrame = True

            return H

        # Run the ECC algorithm. The results are stored in warp_matrix.
        # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
        try:
            (_, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
        except Exception as e:
            LOGGER.warning(f"WARNING: find transform failed. Set warp as identity {e}")

        return H

    def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.array:
        """
        Apply feature-based methods like ORB or SIFT to a raw frame.

        Args:
            raw_frame (np.ndarray): The raw frame to be processed.
            detections (list): List of detections to be used in the processing.

        Returns:
            (np.ndarray): Processed frame.

        Examples:
            >>> gmc = GMC()
            >>> gmc.applyFeatures(np.array([[1, 2, 3], [4, 5, 6]]))
            array([[1, 2, 3],
                   [4, 5, 6]])
        """
        height, width, _ = raw_frame.shape
        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
        H = np.eye(2, 3)

        # Downscale image
        if self.downscale > 1.0:
            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
            width = width // self.downscale
            height = height // self.downscale

        # Find the keypoints
        mask = np.zeros_like(frame)
        mask[int(0.02 * height) : int(0.98 * height), int(0.02 * width) : int(0.98 * width)] = 255
        if detections is not None:
            for det in detections:
                tlbr = (det[:4] / self.downscale).astype(np.int_)
                mask[tlbr[1] : tlbr[3], tlbr[0] : tlbr[2]] = 0

        keypoints = self.detector.detect(frame, mask)

        # Compute the descriptors
        keypoints, descriptors = self.extractor.compute(frame, keypoints)

        # Handle first frame
        if not self.initializedFirstFrame:
            # Initialize data
            self.prevFrame = frame.copy()
            self.prevKeyPoints = copy.copy(keypoints)
            self.prevDescriptors = copy.copy(descriptors)

            # Initialization done
            self.initializedFirstFrame = True

            return H

        # Match descriptors
        knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)

        # Filter matches based on smallest spatial distance
        matches = []
        spatialDistances = []

        maxSpatialDistance = 0.25 * np.array([width, height])

        # Handle empty matches case
        if len(knnMatches) == 0:
            # Store to next iteration
            self.prevFrame = frame.copy()
            self.prevKeyPoints = copy.copy(keypoints)
            self.prevDescriptors = copy.copy(descriptors)

            return H

        for m, n in knnMatches:
            if m.distance < 0.9 * n.distance:
                prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
                currKeyPointLocation = keypoints[m.trainIdx].pt

                spatialDistance = (
                    prevKeyPointLocation[0] - currKeyPointLocation[0],
                    prevKeyPointLocation[1] - currKeyPointLocation[1],
                )

                if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and (
                    np.abs(spatialDistance[1]) < maxSpatialDistance[1]
                ):
                    spatialDistances.append(spatialDistance)
                    matches.append(m)

        meanSpatialDistances = np.mean(spatialDistances, 0)
        stdSpatialDistances = np.std(spatialDistances, 0)

        inliers = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances

        goodMatches = []
        prevPoints = []
        currPoints = []
        for i in range(len(matches)):
            if inliers[i, 0] and inliers[i, 1]:
                goodMatches.append(matches[i])
                prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
                currPoints.append(keypoints[matches[i].trainIdx].pt)

        prevPoints = np.array(prevPoints)
        currPoints = np.array(currPoints)

        # Draw the keypoint matches on the output image
        # if False:
        #     import matplotlib.pyplot as plt
        #     matches_img = np.hstack((self.prevFrame, frame))
        #     matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
        #     W = self.prevFrame.shape[1]
        #     for m in goodMatches:
        #         prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
        #         curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
        #         curr_pt[0] += W
        #         color = np.random.randint(0, 255, 3)
        #         color = (int(color[0]), int(color[1]), int(color[2]))
        #
        #         matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
        #         matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
        #         matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
        #
        #     plt.figure()
        #     plt.imshow(matches_img)
        #     plt.show()

        # Find rigid matrix
        if prevPoints.shape[0] > 4:
            H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)

            # Handle downscale
            if self.downscale > 1.0:
                H[0, 2] *= self.downscale
                H[1, 2] *= self.downscale
        else:
            LOGGER.warning("WARNING: not enough matching points")

        # Store to next iteration
        self.prevFrame = frame.copy()
        self.prevKeyPoints = copy.copy(keypoints)
        self.prevDescriptors = copy.copy(descriptors)

        return H

    def applySparseOptFlow(self, raw_frame: np.array) -> np.array:
        """
        Apply Sparse Optical Flow method to a raw frame.

        Args:
            raw_frame (np.ndarray): The raw frame to be processed.

        Returns:
            (np.ndarray): Processed frame.

        Examples:
            >>> gmc = GMC()
            >>> gmc.applySparseOptFlow(np.array([[1, 2, 3], [4, 5, 6]]))
            array([[1, 2, 3],
                   [4, 5, 6]])
        """
        height, width, _ = raw_frame.shape
        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
        H = np.eye(2, 3)

        # Downscale image
        if self.downscale > 1.0:
            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))

        # Find the keypoints
        keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)

        # Handle first frame
        if not self.initializedFirstFrame or self.prevKeyPoints is None:
            self.prevFrame = frame.copy()
            self.prevKeyPoints = copy.copy(keypoints)
            self.initializedFirstFrame = True
            return H

        # Find correspondences
        matchedKeypoints, status, _ = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)

        # Leave good correspondences only
        prevPoints = []
        currPoints = []

        for i in range(len(status)):
            if status[i]:
                prevPoints.append(self.prevKeyPoints[i])
                currPoints.append(matchedKeypoints[i])

        prevPoints = np.array(prevPoints)
        currPoints = np.array(currPoints)

        # Find rigid matrix
        if (prevPoints.shape[0] > 4) and (prevPoints.shape[0] == prevPoints.shape[0]):
            H, _ = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)

            if self.downscale > 1.0:
                H[0, 2] *= self.downscale
                H[1, 2] *= self.downscale
        else:
            LOGGER.warning("WARNING: not enough matching points")

        self.prevFrame = frame.copy()
        self.prevKeyPoints = copy.copy(keypoints)

        return H

    def reset_params(self) -> None:
        """Reset parameters."""
        self.prevFrame = None
        self.prevKeyPoints = None
        self.prevDescriptors = None
        self.initializedFirstFrame = False

__init__(method='sparseOptFlow', downscale=2)

Π˜Π½ΠΈΡ†ΠΈΠ°Π»ΠΈΠ·ΠΈΡ€ΡƒΠΉ Π²ΠΈΠ΄Π΅ΠΎΡ‚Ρ€Π΅ΠΊΠ΅Ρ€ с ΡƒΠΊΠ°Π·Π°Π½Π½Ρ‹ΠΌΠΈ ΠΏΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Π°ΠΌΠΈ.

ΠŸΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹:

Имя Π’ΠΈΠΏ ОписаниС По ΡƒΠΌΠΎΠ»Ρ‡Π°Π½ΠΈΡŽ
method str

ΠœΠ΅Ρ‚ΠΎΠ΄, ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌΡ‹ΠΉ для отслСТивания. Π’Π°Ρ€ΠΈΠ°Π½Ρ‚Ρ‹: 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.

'sparseOptFlow'
downscale int

ΠšΠΎΡΡ„Ρ„ΠΈΡ†ΠΈΠ΅Π½Ρ‚ пониТСния для ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠΈ ΠΊΠ°Π΄Ρ€ΠΎΠ².

2
Π˜ΡΡ…ΠΎΠ΄Π½Ρ‹ΠΉ ΠΊΠΎΠ΄ Π² ultralytics/trackers/utils/gmc.py
def __init__(self, method: str = "sparseOptFlow", downscale: int = 2) -> None:
    """
    Initialize a video tracker with specified parameters.

    Args:
        method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
        downscale (int): Downscale factor for processing frames.
    """
    super().__init__()

    self.method = method
    self.downscale = max(1, int(downscale))

    if self.method == "orb":
        self.detector = cv2.FastFeatureDetector_create(20)
        self.extractor = cv2.ORB_create()
        self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)

    elif self.method == "sift":
        self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
        self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
        self.matcher = cv2.BFMatcher(cv2.NORM_L2)

    elif self.method == "ecc":
        number_of_iterations = 5000
        termination_eps = 1e-6
        self.warp_mode = cv2.MOTION_EUCLIDEAN
        self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)

    elif self.method == "sparseOptFlow":
        self.feature_params = dict(
            maxCorners=1000, qualityLevel=0.01, minDistance=1, blockSize=3, useHarrisDetector=False, k=0.04
        )

    elif self.method in {"none", "None", None}:
        self.method = None
    else:
        raise ValueError(f"Error: Unknown GMC method:{method}")

    self.prevFrame = None
    self.prevKeyPoints = None
    self.prevDescriptors = None
    self.initializedFirstFrame = False

apply(raw_frame, detections=None)

ΠŸΡ€ΠΈΠΌΠ΅Π½ΠΈ ΠΎΠ±Π½Π°Ρ€ΡƒΠΆΠ΅Π½ΠΈΠ΅ ΠΎΠ±ΡŠΠ΅ΠΊΡ‚ΠΎΠ² Π½Π° Π½Π΅ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½ΠΎΠΌ ΠΊΠ°Π΄Ρ€Π΅, ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΡ ΡƒΠΊΠ°Π·Π°Π½Π½Ρ‹ΠΉ ΠΌΠ΅Ρ‚ΠΎΠ΄.

ΠŸΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹:

Имя Π’ΠΈΠΏ ОписаниС По ΡƒΠΌΠΎΠ»Ρ‡Π°Π½ΠΈΡŽ
raw_frame ndarray

НСобработанный ΠΊΠ°Π΄Ρ€, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π½ΡƒΠΆΠ½ΠΎ ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Ρ‚ΡŒ.

трСбуСтся
detections list

Бписок ΠΎΠ±Π½Π°Ρ€ΡƒΠΆΠ΅Π½ΠΈΠΉ, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ Π±ΡƒΠ΄ΡƒΡ‚ ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Ρ‚ΡŒΡΡ Π² ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠ΅.

None

ВозвращаСтся:

Вип ОписаниС
ndarray

ΠžΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½Ρ‹ΠΉ каркас.

ΠŸΡ€ΠΈΠΌΠ΅Ρ€Ρ‹:

>>> gmc = GMC()
>>> gmc.apply(np.array([[1, 2, 3], [4, 5, 6]]))
array([[1, 2, 3],
       [4, 5, 6]])
Π˜ΡΡ…ΠΎΠ΄Π½Ρ‹ΠΉ ΠΊΠΎΠ΄ Π² ultralytics/trackers/utils/gmc.py
def apply(self, raw_frame: np.array, detections: list = None) -> np.array:
    """
    Apply object detection on a raw frame using specified method.

    Args:
        raw_frame (np.ndarray): The raw frame to be processed.
        detections (list): List of detections to be used in the processing.

    Returns:
        (np.ndarray): Processed frame.

    Examples:
        >>> gmc = GMC()
        >>> gmc.apply(np.array([[1, 2, 3], [4, 5, 6]]))
        array([[1, 2, 3],
               [4, 5, 6]])
    """
    if self.method in {"orb", "sift"}:
        return self.applyFeatures(raw_frame, detections)
    elif self.method == "ecc":
        return self.applyEcc(raw_frame)
    elif self.method == "sparseOptFlow":
        return self.applySparseOptFlow(raw_frame)
    else:
        return np.eye(2, 3)

applyEcc(raw_frame)

ΠŸΡ€ΠΈΠΌΠ΅Π½ΠΈ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌ ECC ΠΊ Π½Π΅ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½ΠΎΠΌΡƒ ΠΊΠ°Π΄Ρ€Ρƒ.

ΠŸΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹:

Имя Π’ΠΈΠΏ ОписаниС По ΡƒΠΌΠΎΠ»Ρ‡Π°Π½ΠΈΡŽ
raw_frame ndarray

НСобработанный ΠΊΠ°Π΄Ρ€, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π½ΡƒΠΆΠ½ΠΎ ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Ρ‚ΡŒ.

трСбуСтся

ВозвращаСтся:

Вип ОписаниС
ndarray

ΠžΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½Ρ‹ΠΉ каркас.

ΠŸΡ€ΠΈΠΌΠ΅Ρ€Ρ‹:

>>> gmc = GMC()
>>> gmc.applyEcc(np.array([[1, 2, 3], [4, 5, 6]]))
array([[1, 2, 3],
       [4, 5, 6]])
Π˜ΡΡ…ΠΎΠ΄Π½Ρ‹ΠΉ ΠΊΠΎΠ΄ Π² ultralytics/trackers/utils/gmc.py
def applyEcc(self, raw_frame: np.array) -> np.array:
    """
    Apply ECC algorithm to a raw frame.

    Args:
        raw_frame (np.ndarray): The raw frame to be processed.

    Returns:
        (np.ndarray): Processed frame.

    Examples:
        >>> gmc = GMC()
        >>> gmc.applyEcc(np.array([[1, 2, 3], [4, 5, 6]]))
        array([[1, 2, 3],
               [4, 5, 6]])
    """
    height, width, _ = raw_frame.shape
    frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
    H = np.eye(2, 3, dtype=np.float32)

    # Downscale image
    if self.downscale > 1.0:
        frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
        frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
        width = width // self.downscale
        height = height // self.downscale

    # Handle first frame
    if not self.initializedFirstFrame:
        # Initialize data
        self.prevFrame = frame.copy()

        # Initialization done
        self.initializedFirstFrame = True

        return H

    # Run the ECC algorithm. The results are stored in warp_matrix.
    # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
    try:
        (_, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
    except Exception as e:
        LOGGER.warning(f"WARNING: find transform failed. Set warp as identity {e}")

    return H

applyFeatures(raw_frame, detections=None)

ΠŸΡ€ΠΈΠΌΠ΅Π½ΠΈ ΠΊ Π½Π΅ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½ΠΎΠΌΡƒ ΠΊΠ°Π΄Ρ€Ρƒ ΠΌΠ΅Ρ‚ΠΎΠ΄Ρ‹, основанныС Π½Π° ΠΏΡ€ΠΈΠ·Π½Π°ΠΊΠ°Ρ…, Ρ‚Π°ΠΊΠΈΠ΅ ΠΊΠ°ΠΊ ORB ΠΈΠ»ΠΈ SIFT.

ΠŸΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹:

Имя Π’ΠΈΠΏ ОписаниС По ΡƒΠΌΠΎΠ»Ρ‡Π°Π½ΠΈΡŽ
raw_frame ndarray

НСобработанный ΠΊΠ°Π΄Ρ€, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π½ΡƒΠΆΠ½ΠΎ ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Ρ‚ΡŒ.

трСбуСтся
detections list

Бписок ΠΎΠ±Π½Π°Ρ€ΡƒΠΆΠ΅Π½ΠΈΠΉ, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ Π±ΡƒΠ΄ΡƒΡ‚ ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Ρ‚ΡŒΡΡ Π² ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠ΅.

None

ВозвращаСтся:

Вип ОписаниС
ndarray

ΠžΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½Ρ‹ΠΉ каркас.

ΠŸΡ€ΠΈΠΌΠ΅Ρ€Ρ‹:

>>> gmc = GMC()
>>> gmc.applyFeatures(np.array([[1, 2, 3], [4, 5, 6]]))
array([[1, 2, 3],
       [4, 5, 6]])
Π˜ΡΡ…ΠΎΠ΄Π½Ρ‹ΠΉ ΠΊΠΎΠ΄ Π² ultralytics/trackers/utils/gmc.py
def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.array:
    """
    Apply feature-based methods like ORB or SIFT to a raw frame.

    Args:
        raw_frame (np.ndarray): The raw frame to be processed.
        detections (list): List of detections to be used in the processing.

    Returns:
        (np.ndarray): Processed frame.

    Examples:
        >>> gmc = GMC()
        >>> gmc.applyFeatures(np.array([[1, 2, 3], [4, 5, 6]]))
        array([[1, 2, 3],
               [4, 5, 6]])
    """
    height, width, _ = raw_frame.shape
    frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
    H = np.eye(2, 3)

    # Downscale image
    if self.downscale > 1.0:
        frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
        width = width // self.downscale
        height = height // self.downscale

    # Find the keypoints
    mask = np.zeros_like(frame)
    mask[int(0.02 * height) : int(0.98 * height), int(0.02 * width) : int(0.98 * width)] = 255
    if detections is not None:
        for det in detections:
            tlbr = (det[:4] / self.downscale).astype(np.int_)
            mask[tlbr[1] : tlbr[3], tlbr[0] : tlbr[2]] = 0

    keypoints = self.detector.detect(frame, mask)

    # Compute the descriptors
    keypoints, descriptors = self.extractor.compute(frame, keypoints)

    # Handle first frame
    if not self.initializedFirstFrame:
        # Initialize data
        self.prevFrame = frame.copy()
        self.prevKeyPoints = copy.copy(keypoints)
        self.prevDescriptors = copy.copy(descriptors)

        # Initialization done
        self.initializedFirstFrame = True

        return H

    # Match descriptors
    knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)

    # Filter matches based on smallest spatial distance
    matches = []
    spatialDistances = []

    maxSpatialDistance = 0.25 * np.array([width, height])

    # Handle empty matches case
    if len(knnMatches) == 0:
        # Store to next iteration
        self.prevFrame = frame.copy()
        self.prevKeyPoints = copy.copy(keypoints)
        self.prevDescriptors = copy.copy(descriptors)

        return H

    for m, n in knnMatches:
        if m.distance < 0.9 * n.distance:
            prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
            currKeyPointLocation = keypoints[m.trainIdx].pt

            spatialDistance = (
                prevKeyPointLocation[0] - currKeyPointLocation[0],
                prevKeyPointLocation[1] - currKeyPointLocation[1],
            )

            if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and (
                np.abs(spatialDistance[1]) < maxSpatialDistance[1]
            ):
                spatialDistances.append(spatialDistance)
                matches.append(m)

    meanSpatialDistances = np.mean(spatialDistances, 0)
    stdSpatialDistances = np.std(spatialDistances, 0)

    inliers = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances

    goodMatches = []
    prevPoints = []
    currPoints = []
    for i in range(len(matches)):
        if inliers[i, 0] and inliers[i, 1]:
            goodMatches.append(matches[i])
            prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
            currPoints.append(keypoints[matches[i].trainIdx].pt)

    prevPoints = np.array(prevPoints)
    currPoints = np.array(currPoints)

    # Draw the keypoint matches on the output image
    # if False:
    #     import matplotlib.pyplot as plt
    #     matches_img = np.hstack((self.prevFrame, frame))
    #     matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
    #     W = self.prevFrame.shape[1]
    #     for m in goodMatches:
    #         prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
    #         curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
    #         curr_pt[0] += W
    #         color = np.random.randint(0, 255, 3)
    #         color = (int(color[0]), int(color[1]), int(color[2]))
    #
    #         matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
    #         matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
    #         matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
    #
    #     plt.figure()
    #     plt.imshow(matches_img)
    #     plt.show()

    # Find rigid matrix
    if prevPoints.shape[0] > 4:
        H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)

        # Handle downscale
        if self.downscale > 1.0:
            H[0, 2] *= self.downscale
            H[1, 2] *= self.downscale
    else:
        LOGGER.warning("WARNING: not enough matching points")

    # Store to next iteration
    self.prevFrame = frame.copy()
    self.prevKeyPoints = copy.copy(keypoints)
    self.prevDescriptors = copy.copy(descriptors)

    return H

applySparseOptFlow(raw_frame)

ΠŸΡ€ΠΈΠΌΠ΅Π½ΠΈ ΠΌΠ΅Ρ‚ΠΎΠ΄ Sparse Optical Flow ΠΊ Π½Π΅ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½ΠΎΠΌΡƒ ΠΊΠ°Π΄Ρ€Ρƒ.

ΠŸΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹:

Имя Π’ΠΈΠΏ ОписаниС По ΡƒΠΌΠΎΠ»Ρ‡Π°Π½ΠΈΡŽ
raw_frame ndarray

НСобработанный ΠΊΠ°Π΄Ρ€, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹ΠΉ Π½ΡƒΠΆΠ½ΠΎ ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚Π°Ρ‚ΡŒ.

трСбуСтся

ВозвращаСтся:

Вип ОписаниС
ndarray

ΠžΠ±Ρ€Π°Π±ΠΎΡ‚Π°Π½Π½Ρ‹ΠΉ каркас.

ΠŸΡ€ΠΈΠΌΠ΅Ρ€Ρ‹:

>>> gmc = GMC()
>>> gmc.applySparseOptFlow(np.array([[1, 2, 3], [4, 5, 6]]))
array([[1, 2, 3],
       [4, 5, 6]])
Π˜ΡΡ…ΠΎΠ΄Π½Ρ‹ΠΉ ΠΊΠΎΠ΄ Π² ultralytics/trackers/utils/gmc.py
def applySparseOptFlow(self, raw_frame: np.array) -> np.array:
    """
    Apply Sparse Optical Flow method to a raw frame.

    Args:
        raw_frame (np.ndarray): The raw frame to be processed.

    Returns:
        (np.ndarray): Processed frame.

    Examples:
        >>> gmc = GMC()
        >>> gmc.applySparseOptFlow(np.array([[1, 2, 3], [4, 5, 6]]))
        array([[1, 2, 3],
               [4, 5, 6]])
    """
    height, width, _ = raw_frame.shape
    frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
    H = np.eye(2, 3)

    # Downscale image
    if self.downscale > 1.0:
        frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))

    # Find the keypoints
    keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)

    # Handle first frame
    if not self.initializedFirstFrame or self.prevKeyPoints is None:
        self.prevFrame = frame.copy()
        self.prevKeyPoints = copy.copy(keypoints)
        self.initializedFirstFrame = True
        return H

    # Find correspondences
    matchedKeypoints, status, _ = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)

    # Leave good correspondences only
    prevPoints = []
    currPoints = []

    for i in range(len(status)):
        if status[i]:
            prevPoints.append(self.prevKeyPoints[i])
            currPoints.append(matchedKeypoints[i])

    prevPoints = np.array(prevPoints)
    currPoints = np.array(currPoints)

    # Find rigid matrix
    if (prevPoints.shape[0] > 4) and (prevPoints.shape[0] == prevPoints.shape[0]):
        H, _ = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)

        if self.downscale > 1.0:
            H[0, 2] *= self.downscale
            H[1, 2] *= self.downscale
    else:
        LOGGER.warning("WARNING: not enough matching points")

    self.prevFrame = frame.copy()
    self.prevKeyPoints = copy.copy(keypoints)

    return H

reset_params()

Π‘Π±Ρ€ΠΎΡΡŒ ΠΏΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹.

Π˜ΡΡ…ΠΎΠ΄Π½Ρ‹ΠΉ ΠΊΠΎΠ΄ Π² ultralytics/trackers/utils/gmc.py
def reset_params(self) -> None:
    """Reset parameters."""
    self.prevFrame = None
    self.prevKeyPoints = None
    self.prevDescriptors = None
    self.initializedFirstFrame = False





Боздано 2023-11-12, ОбновлСно 2024-05-08
Авторы: Burhan-Q (1), glenn-jocher (3)