Yolov8-源码解析-三十九-

Yolov8 源码解析(三十九)

.\yolov8\ultralytics\trackers\bot_sort.py

# Ultralytics YOLO 🚀, AGPL-3.0 license

from collections import deque  # 导入 deque 数据结构,用于存储特征向量历史
import numpy as np  # 导入 NumPy 库,用于数值计算

from .basetrack import TrackState  # 导入 TrackState 类,用于跟踪状态
from .byte_tracker import BYTETracker, STrack  # 导入 BYTETracker 和 STrack 类
from .utils import matching  # 导入 matching 函数,用于匹配操作
from .utils.gmc import GMC  # 导入 GMC 类
from .utils.kalman_filter import KalmanFilterXYWH  # 导入 KalmanFilterXYWH 类


class BOTrack(STrack):
    """
    An extended version of the STrack class for YOLOv8, adding object tracking features.

    Attributes:
        shared_kalman (KalmanFilterXYWH): A shared Kalman filter for all instances of BOTrack.
        smooth_feat (np.ndarray): Smoothed feature vector.
        curr_feat (np.ndarray): Current feature vector.
        features (deque): A deque to store feature vectors with a maximum length defined by `feat_history`.
        alpha (float): Smoothing factor for the exponential moving average of features.
        mean (np.ndarray): The mean state of the Kalman filter.
        covariance (np.ndarray): The covariance matrix of the Kalman filter.

    Methods:
        update_features(feat): Update features vector and smooth it using exponential moving average.
        predict(): Predicts the mean and covariance using Kalman filter.
        re_activate(new_track, frame_id, new_id): Reactivates a track with updated features and optionally new ID.
        update(new_track, frame_id): Update the YOLOv8 instance with new track and frame ID.
        tlwh: Property that gets the current position in tlwh format `(top left x, top left y, width, height)`.
        multi_predict(stracks): Predicts the mean and covariance of multiple object tracks using shared Kalman filter.
        convert_coords(tlwh): Converts tlwh bounding box coordinates to xywh format.
        tlwh_to_xywh(tlwh): Convert bounding box to xywh format `(center x, center y, width, height)`.

    Usage:
        bo_track = BOTrack(tlwh, score, cls, feat)
        bo_track.predict()
        bo_track.update(new_track, frame_id)
    """

    shared_kalman = KalmanFilterXYWH()  # 创建一个共享的 KalmanFilterXYWH 对象

    def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
        """Initialize YOLOv8 object with temporal parameters, such as feature history, alpha and current features."""
        super().__init__(tlwh, score, cls)  # 调用父类 STrack 的初始化方法

        self.smooth_feat = None  # 初始化平滑后的特征向量
        self.curr_feat = None  # 初始化当前的特征向量
        if feat is not None:
            self.update_features(feat)  # 若提供了特征向量 feat,则更新特征向量

        self.features = deque([], maxlen=feat_history)  # 创建一个空的 deque,用于存储特征向量历史,最大长度为 feat_history
        self.alpha = 0.9  # 初始化指数移动平均的平滑因子

    def update_features(self, feat):
        """Update features vector and smooth it using exponential moving average."""
        feat /= np.linalg.norm(feat)  # 对特征向量进行归一化处理
        self.curr_feat = feat  # 更新当前特征向量

        if self.smooth_feat is None:
            self.smooth_feat = feat  # 若平滑后的特征向量还未初始化,则直接赋值为当前特征向量
        else:
            self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat  # 否则进行指数移动平均平滑处理

        self.features.append(feat)  # 将当前特征向量添加到 deque 中
        self.smooth_feat /= np.linalg.norm(self.smooth_feat)  # 对平滑后的特征向量再次进行归一化处理
    def predict(self):
        """Predicts the mean and covariance using Kalman filter."""
        # 复制当前的均值状态
        mean_state = self.mean.copy()
        # 如果跟踪状态不是已跟踪,则将速度置为零
        if self.state != TrackState.Tracked:
            mean_state[6] = 0
            mean_state[7] = 0

        # 使用卡尔曼滤波器进行预测,更新均值和协方差
        self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)

    def re_activate(self, new_track, frame_id, new_id=False):
        """Reactivates a track with updated features and optionally assigns a new ID."""
        # 如果新的轨迹具有当前特征,则更新特征
        if new_track.curr_feat is not None:
            self.update_features(new_track.curr_feat)
        # 调用父类的重新激活方法,传递新的轨迹、帧ID和是否需要新ID的信息
        super().re_activate(new_track, frame_id, new_id)

    def update(self, new_track, frame_id):
        """Update the YOLOv8 instance with new track and frame ID."""
        # 如果新的轨迹具有当前特征,则更新特征
        if new_track.curr_feat is not None:
            self.update_features(new_track.curr_feat)
        # 调用父类的更新方法,传递新的轨迹和帧ID
        super().update(new_track, frame_id)

    @property
    def tlwh(self):
        """Get current position in bounding box format `(top left x, top left y, width, height)`."""
        # 如果均值为空,则返回私有属性 `_tlwh` 的副本
        if self.mean is None:
            return self._tlwh.copy()
        # 否则,从均值中获取当前位置的副本,并计算其左上角坐标和大小
        ret = self.mean[:4].copy()
        ret[:2] -= ret[2:] / 2
        return ret

    @staticmethod
    def multi_predict(stracks):
        """Predicts the mean and covariance of multiple object tracks using shared Kalman filter."""
        # 如果输入的轨迹数小于等于0,则直接返回
        if len(stracks) <= 0:
            return
        # 获取所有轨迹的均值和协方差,并转换为数组
        multi_mean = np.asarray([st.mean.copy() for st in stracks])
        multi_covariance = np.asarray([st.covariance for st in stracks])
        # 对于每一个轨迹,如果其状态不是已跟踪,则将速度置为零
        for i, st in enumerate(stracks):
            if st.state != TrackState.Tracked:
                multi_mean[i][6] = 0
                multi_mean[i][7] = 0
        # 使用共享卡尔曼滤波器进行多目标预测,更新均值和协方差
        multi_mean, multi_covariance = BOTrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
        # 将更新后的均值和协方差分配回每一个轨迹
        for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
            stracks[i].mean = mean
            stracks[i].covariance = cov

    def convert_coords(self, tlwh):
        """Converts Top-Left-Width-Height bounding box coordinates to X-Y-Width-Height format."""
        # 调用静态方法 tlwh_to_xywh 进行坐标转换
        return self.tlwh_to_xywh(tlwh)

    @staticmethod
    def tlwh_to_xywh(tlwh):
        """Convert bounding box to format `(center x, center y, width, height)`."""
        # 复制传入的 tlwh 数组
        ret = np.asarray(tlwh).copy()
        # 计算中心点坐标并更新 ret 数组
        ret[:2] += ret[2:] / 2
        return ret
# BOTSORT 类是 BYTETracker 类的扩展版本,专为使用 ReID 和 GMC 算法进行对象跟踪的 YOLOv8 设计。

class BOTSORT(BYTETracker):
    """
    An extended version of the BYTETracker class for YOLOv8, designed for object tracking with ReID and GMC algorithm.

    Attributes:
        proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections.
        appearance_thresh (float): Threshold for appearance similarity (ReID embeddings) between tracks and detections.
        encoder (object): Object to handle ReID embeddings, set to None if ReID is not enabled.
        gmc (GMC): An instance of the GMC algorithm for data association.
        args (object): Parsed command-line arguments containing tracking parameters.

    Methods:
        get_kalmanfilter(): Returns an instance of KalmanFilterXYWH for object tracking.
        init_track(dets, scores, cls, img): Initialize track with detections, scores, and classes.
        get_dists(tracks, detections): Get distances between tracks and detections using IoU and (optionally) ReID.
        multi_predict(tracks): Predict and track multiple objects with YOLOv8 model.

    Usage:
        bot_sort = BOTSORT(args, frame_rate)
        bot_sort.init_track(dets, scores, cls, img)
        bot_sort.multi_predict(tracks)

    Note:
        The class is designed to work with the YOLOv8 object detection model and supports ReID only if enabled via args.
    """

    def __init__(self, args, frame_rate=30):
        """Initialize YOLOv8 object with ReID module and GMC algorithm."""
        # 调用父类的初始化方法
        super().__init__(args, frame_rate)
        # 设置空间接近度阈值和外观相似度阈值
        self.proximity_thresh = args.proximity_thresh
        self.appearance_thresh = args.appearance_thresh

        if args.with_reid:
            # 如果启用了 ReID,但尚未支持 BoT-SORT(reid)
            self.encoder = None
        # 初始化 GMC 算法实例
        self.gmc = GMC(method=args.gmc_method)

    def get_kalmanfilter(self):
        """Returns an instance of KalmanFilterXYWH for object tracking."""
        # 返回用于对象跟踪的 KalmanFilterXYWH 实例
        return KalmanFilterXYWH()

    def init_track(self, dets, scores, cls, img=None):
        """Initialize track with detections, scores, and classes."""
        if len(dets) == 0:
            return []
        if self.args.with_reid and self.encoder is not None:
            # 如果启用了 ReID 并且有编码器对象,则进行特征推断
            features_keep = self.encoder.inference(img, dets)
            # 返回包含位置、分数、类别和特征的 BOTrack 实例列表
            return [BOTrack(xyxy, s, c, f) for (xyxy, s, c, f) in zip(dets, scores, cls, features_keep)]
        else:
            # 返回包含位置、分数和类别的 BOTrack 实例列表
            return [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)]
    # 获取跟踪目标与检测目标之间的距离,使用IoU和(可选的)ReID嵌入特征。
    def get_dists(self, tracks, detections):
        dists = matching.iou_distance(tracks, detections)  # 计算使用IoU的距离
        dists_mask = dists > self.proximity_thresh  # 创建距离阈值掩码

        # TODO: mot20
        # 如果不是使用mot20数据集,则进行融合评分处理
        # if not self.args.mot20:
        dists = matching.fuse_score(dists, detections)

        # 如果启用了ReID并且有编码器
        if self.args.with_reid and self.encoder is not None:
            emb_dists = matching.embedding_distance(tracks, detections) / 2.0  # 计算嵌入距离
            emb_dists[emb_dists > self.appearance_thresh] = 1.0  # 应用外观阈值
            emb_dists[dists_mask] = 1.0  # 应用距离掩码
            dists = np.minimum(dists, emb_dists)  # 取最小距离

        return dists  # 返回距离矩阵

    # 使用YOLOv8模型进行多对象预测和跟踪
    def multi_predict(self, tracks):
        BOTrack.multi_predict(tracks)

    # 重置跟踪器状态
    def reset(self):
        super().reset()  # 调用父类的重置方法
        self.gmc.reset_params()  # 重置特定的参数

.\yolov8\ultralytics\trackers\byte_tracker.py

# 导入 NumPy 库,用于处理数值计算
import numpy as np

# 从上级目录的 utils 模块中导入 LOGGER 对象
from ..utils import LOGGER
# 从 utils.ops 模块中导入 xywh2ltwh 函数,用于坐标转换
from ..utils.ops import xywh2ltwh
# 从当前目录下的 basetrack 模块中导入 BaseTrack 类和 TrackState 枚举
from .basetrack import BaseTrack, TrackState
# 从当前目录下的 utils 模块中导入 matching 函数
from .utils import matching
# 从当前目录下的 utils.kalman_filter 模块中导入 KalmanFilterXYAH 类
from .utils.kalman_filter import KalmanFilterXYAH

# 定义 STrack 类,继承自 BaseTrack 类
class STrack(BaseTrack):
    """
    Single object tracking representation that uses Kalman filtering for state estimation.

    This class is responsible for storing all the information regarding individual tracklets and performs state updates
    and predictions based on Kalman filter.

    Attributes:
        shared_kalman (KalmanFilterXYAH): Shared Kalman filter that is used across all STrack instances for prediction.
        _tlwh (np.ndarray): Private attribute to store top-left corner coordinates and width and height of bounding box.
        kalman_filter (KalmanFilterXYAH): Instance of Kalman filter used for this particular object track.
        mean (np.ndarray): Mean state estimate vector.
        covariance (np.ndarray): Covariance of state estimate.
        is_activated (bool): Boolean flag indicating if the track has been activated.
        score (float): Confidence score of the track.
        tracklet_len (int): Length of the tracklet.
        cls (any): Class label for the object.
        idx (int): Index or identifier for the object.
        frame_id (int): Current frame ID.
        start_frame (int): Frame where the object was first detected.

    Methods:
        predict(): Predict the next state of the object using Kalman filter.
        multi_predict(stracks): Predict the next states for multiple tracks.
        multi_gmc(stracks, H): Update multiple track states using a homography matrix.
        activate(kalman_filter, frame_id): Activate a new tracklet.
        re_activate(new_track, frame_id, new_id): Reactivate a previously lost tracklet.
        update(new_track, frame_id): Update the state of a matched track.
        convert_coords(tlwh): Convert bounding box to x-y-aspect-height format.
        tlwh_to_xyah(tlwh): Convert tlwh bounding box to xyah format.
    """

    # 共享的 Kalman 过滤器实例,用于所有 STrack 实例的预测
    shared_kalman = KalmanFilterXYAH()

    def __init__(self, xywh, score, cls):
        """Initialize new STrack instance."""
        super().__init__()
        # 将 xywh 转换为 tlwh 格式并存储在私有属性 _tlwh 中
        # xywh 是包含位置和尺寸信息的数组,score 是置信度,cls 是类别标签
        assert len(xywh) in {5, 6}, f"expected 5 or 6 values but got {len(xywh)}"
        self._tlwh = np.asarray(xywh2ltwh(xywh[:4]), dtype=np.float32)
        # 初始化 Kalman 过滤器、状态估计向量和协方差矩阵
        self.kalman_filter = None
        self.mean, self.covariance = None, None
        # 标识是否已激活的布尔值
        self.is_activated = False

        # 分数(置信度)、轨迹长度、类别标签和对象标识符
        self.score = score
        self.tracklet_len = 0
        self.cls = cls
        self.idx = xywh[-1]  # 最后一个元素是对象的唯一标识符
        self.angle = xywh[4] if len(xywh) == 6 else None  # 如果有角度信息则存储角度

    def predict(self):
        """Predicts mean and covariance using Kalman filter."""
        # 复制当前均值状态
        mean_state = self.mean.copy()
        # 如果跟踪状态不是已跟踪状态,则将速度信息置零
        if self.state != TrackState.Tracked:
            mean_state[7] = 0
        # 使用 Kalman 过滤器预测下一个状态的均值和协方差
        self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
    @staticmethod
    def multi_predict(stracks):
        """Perform multi-object predictive tracking using Kalman filter for given stracks."""
        # 如果输入的stracks为空,则直接返回,不进行处理
        if len(stracks) <= 0:
            return
        
        # 从每个STrack对象中提取mean属性的副本,形成numpy数组multi_mean
        multi_mean = np.asarray([st.mean.copy() for st in stracks])
        # 从每个STrack对象中提取covariance属性,形成numpy数组multi_covariance
        multi_covariance = np.asarray([st.covariance for st in stracks])
        
        # 遍历每个STrack对象,如果其状态不是Tracked,则将其mean属性的第8个元素设为0
        for i, st in enumerate(stracks):
            if st.state != TrackState.Tracked:
                multi_mean[i][7] = 0
        
        # 调用共享的Kalman滤波器进行多对象预测,更新multi_mean和multi_covariance
        multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
        
        # 将预测得到的mean和covariance更新回各个STrack对象中
        for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
            stracks[i].mean = mean
            stracks[i].covariance = cov

    @staticmethod
    def multi_gmc(stracks, H=np.eye(2, 3)):
        """Update state tracks positions and covariances using a homography matrix."""
        # 如果输入的stracks不为空
        if len(stracks) > 0:
            # 从每个STrack对象中提取mean属性的副本,形成numpy数组multi_mean
            multi_mean = np.asarray([st.mean.copy() for st in stracks])
            # 从每个STrack对象中提取covariance属性,形成numpy数组multi_covariance
            multi_covariance = np.asarray([st.covariance for st in stracks])

            # 从H矩阵中提取旋转部分R,并构造一个扩展到8x8维度的R矩阵R8x8
            R = H[:2, :2]
            R8x8 = np.kron(np.eye(4, dtype=float), R)
            # 提取H矩阵中的平移部分t
            t = H[:2, 2]

            # 遍历每个STrack对象,根据H矩阵更新其mean和covariance属性
            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
                mean = R8x8.dot(mean)  # 通过R8x8变换更新mean
                mean[:2] += t  # 加上平移t
                cov = R8x8.dot(cov).dot(R8x8.transpose())  # 更新covariance

                # 将更新后的mean和covariance重新赋值回各个STrack对象中
                stracks[i].mean = mean
                stracks[i].covariance = cov

    def activate(self, kalman_filter, frame_id):
        """Start a new tracklet."""
        # 初始化新的跟踪对象,设置其kalman_filter、track_id、mean和covariance等属性
        self.kalman_filter = kalman_filter
        self.track_id = self.next_id()
        self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))

        self.tracklet_len = 0
        self.state = TrackState.Tracked
        if frame_id == 1:
            self.is_activated = True
        self.frame_id = frame_id
        self.start_frame = frame_id

    def re_activate(self, new_track, frame_id, new_id=False):
        """Reactivates a previously lost track with a new detection."""
        # 使用新的检测数据更新已丢失的跟踪对象的mean和covariance属性
        self.mean, self.covariance = self.kalman_filter.update(
            self.mean, self.covariance, self.convert_coords(new_track.tlwh)
        )
        self.tracklet_len = 0
        self.state = TrackState.Tracked
        self.is_activated = True
        self.frame_id = frame_id
        if new_id:
            self.track_id = self.next_id()
        self.score = new_track.score
        self.cls = new_track.cls
        self.angle = new_track.angle
        self.idx = new_track.idx
    # 更新匹配跟踪的状态。
    def update(self, new_track, frame_id):
        """
        Update the state of a matched track.

        Args:
            new_track (STrack): The new track containing updated information.
            frame_id (int): The ID of the current frame.
        """
        # 更新帧 ID
        self.frame_id = frame_id
        # 增加跟踪段长度
        self.tracklet_len += 1

        # 获取新跟踪的位置信息
        new_tlwh = new_track.tlwh
        # 使用卡尔曼滤波器更新跟踪状态的均值和协方差
        self.mean, self.covariance = self.kalman_filter.update(
            self.mean, self.covariance, self.convert_coords(new_tlwh)
        )
        # 设置跟踪状态为已跟踪
        self.state = TrackState.Tracked
        # 激活跟踪器
        self.is_activated = True

        # 更新跟踪器的分数
        self.score = new_track.score
        # 更新跟踪器的类别
        self.cls = new_track.cls
        # 更新跟踪器的角度
        self.angle = new_track.angle
        # 更新跟踪器的索引
        self.idx = new_track.idx

    # 将边界框的 top-left-width-height 格式转换为 x-y-aspect-height 格式。
    def convert_coords(self, tlwh):
        """Convert a bounding box's top-left-width-height format to its x-y-aspect-height equivalent."""
        return self.tlwh_to_xyah(tlwh)

    # 返回当前位置的边界框格式 (top left x, top left y, width, height)。
    @property
    def tlwh(self):
        """Get current position in bounding box format (top left x, top left y, width, height)."""
        if self.mean is None:
            return self._tlwh.copy()
        ret = self.mean[:4].copy()
        ret[2] *= ret[3]
        ret[:2] -= ret[2:] / 2
        return ret

    # 将边界框转换为格式 (min x, min y, max x, max y),即 (top left, bottom right)。
    @property
    def xyxy(self):
        """Convert bounding box to format (min x, min y, max x, max y), i.e., (top left, bottom right)."""
        ret = self.tlwh.copy()
        ret[2:] += ret[:2]
        return ret

    # 将边界框转换为格式 (center x, center y, aspect ratio, height),其中 aspect ratio 是宽度 / 高度。
    @staticmethod
    def tlwh_to_xyah(tlwh):
        """Convert bounding box to format (center x, center y, aspect ratio, height), where the aspect ratio is width /
        height.
        """
        ret = np.asarray(tlwh).copy()
        ret[:2] += ret[2:] / 2
        ret[2] /= ret[3]
        return ret

    # 返回当前位置的边界框格式 (center x, center y, width, height)。
    @property
    def xywh(self):
        """Get current position in bounding box format (center x, center y, width, height)."""
        ret = np.asarray(self.tlwh).copy()
        ret[:2] += ret[2:] / 2
        return ret

    # 返回当前位置的边界框格式 (center x, center y, width, height, angle)。
    @property
    def xywha(self):
        """Get current position in bounding box format (center x, center y, width, height, angle)."""
        if self.angle is None:
            LOGGER.warning("WARNING ⚠️ `angle` attr not found, returning `xywh` instead.")
            return self.xywh
        return np.concatenate([self.xywh, self.angle[None]])

    # 返回当前跟踪结果。
    @property
    def result(self):
        """Get current tracking results."""
        coords = self.xyxy if self.angle is None else self.xywha
        return coords.tolist() + [self.track_id, self.score, self.cls, self.idx]

    # 返回 BYTETracker 对象的字符串表示,包括开始和结束帧以及跟踪 ID。
    def __repr__(self):
        """Return a string representation of the BYTETracker object with start and end frames and track ID."""
        return f"OT_{self.track_id}_({self.start_frame}-{self.end_frame})"
class BYTETracker:
    """
    BYTETracker: A tracking algorithm built on top of YOLOv8 for object detection and tracking.

    The class is responsible for initializing, updating, and managing the tracks for detected objects in a video
    sequence. It maintains the state of tracked, lost, and removed tracks over frames, utilizes Kalman filtering for
    predicting the new object locations, and performs data association.

    Attributes:
        tracked_stracks (list[STrack]): List of successfully activated tracks.
        lost_stracks (list[STrack]): List of lost tracks.
        removed_stracks (list[STrack]): List of removed tracks.
        frame_id (int): The current frame ID.
        args (namespace): Command-line arguments.
        max_time_lost (int): The maximum frames for a track to be considered as 'lost'.
        kalman_filter (object): Kalman Filter object.

    Methods:
        update(results, img=None): Updates object tracker with new detections.
        get_kalmanfilter(): Returns a Kalman filter object for tracking bounding boxes.
        init_track(dets, scores, cls, img=None): Initialize object tracking with detections.
        get_dists(tracks, detections): Calculates the distance between tracks and detections.
        multi_predict(tracks): Predicts the location of tracks.
        reset_id(): Resets the ID counter of STrack.
        joint_stracks(tlista, tlistb): Combines two lists of stracks.
        sub_stracks(tlista, tlistb): Filters out the stracks present in the second list from the first list.
        remove_duplicate_stracks(stracksa, stracksb): Removes duplicate stracks based on IoU.
    """

    def __init__(self, args, frame_rate=30):
        """Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
        # 初始化已跟踪的轨迹列表
        self.tracked_stracks = []  # type: list[STrack]
        # 初始化丢失的轨迹列表
        self.lost_stracks = []  # type: list[STrack]
        # 初始化移除的轨迹列表
        self.removed_stracks = []  # type: list[STrack]

        # 当前帧的 ID
        self.frame_id = 0
        # 命令行参数
        self.args = args
        # 计算最大丢失帧数,用于确定轨迹丢失
        self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer)
        # 获取 Kalman 过滤器对象
        self.kalman_filter = self.get_kalmanfilter()
        # 重置 STrack 的 ID 计数器
        self.reset_id()

    def get_kalmanfilter(self):
        """Returns a Kalman filter object for tracking bounding boxes."""
        # 返回一个 Kalman 过滤器对象,用于跟踪边界框
        return KalmanFilterXYAH()

    def init_track(self, dets, scores, cls, img=None):
        """Initialize object tracking with detections and scores using STrack algorithm."""
        # 使用 STrack 算法,根据检测结果和分数初始化对象跟踪
        return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else []  # detections

    def get_dists(self, tracks, detections):
        """Calculates the distance between tracks and detections using IoU and fuses scores."""
        # 使用 IoU 计算轨迹和检测之间的距离,并融合分数
        dists = matching.iou_distance(tracks, detections)
        # 如果不是 MOT20 数据集,执行分数融合
        # TODO: mot20
        # if not self.args.mot20:
        dists = matching.fuse_score(dists, detections)
        return dists
    def multi_predict(self, tracks):
        """
        Returns the predicted tracks using the YOLOv8 network.
        """
        # 调用 STrack 类的 multi_predict 方法来进行多目标跟踪预测
        STrack.multi_predict(tracks)

    @staticmethod
    def reset_id():
        """
        Resets the ID counter of STrack.
        """
        # 调用 STrack 类的 reset_id 静态方法,重置 STrack 的 ID 计数器
        STrack.reset_id()

    def reset(self):
        """
        Reset tracker.
        """
        # 初始化跟踪器,重置所有跟踪状态
        self.tracked_stracks = []  # type: list[STrack]
        self.lost_stracks = []  # type: list[STrack]
        self.removed_stracks = []  # type: list[STrack]
        self.frame_id = 0
        self.kalman_filter = self.get_kalmanfilter()
        self.reset_id()  # 调用重置 ID 计数器的方法

    @staticmethod
    def joint_stracks(tlista, tlistb):
        """
        Combine two lists of stracks into a single one.
        """
        # 使用字典记录已经存在的 track_id
        exists = {}
        res = []
        # 将 tlista 中的每个元素添加到 res 中,并记录其 track_id 到 exists 字典中
        for t in tlista:
            exists[t.track_id] = 1
            res.append(t)
        # 遍历 tlistb,如果其 track_id 不存在于 exists 字典中,则添加到 res 中
        for t in tlistb:
            tid = t.track_id
            if not exists.get(tid, 0):
                exists[tid] = 1
                res.append(t)
        return res

    @staticmethod
    def sub_stracks(tlista, tlistb):
        """
        DEPRECATED CODE in https://github.com/ultralytics/ultralytics/pull/1890/
        """
        # 使用集合推导式创建 tlista 中每个元素的 track_id 集合
        track_ids_b = {t.track_id for t in tlistb}
        # 返回 tlista 中所有 track_id 不在 track_ids_b 集合中的元素列表
        return [t for t in tlista if t.track_id not in track_ids_b]

    @staticmethod
    def remove_duplicate_stracks(stracksa, stracksb):
        """
        Remove duplicate stracks with non-maximum IoU distance.
        """
        # 计算 stracksa 和 stracksb 之间的 IoU 距离矩阵
        pdist = matching.iou_distance(stracksa, stracksb)
        # 找到 IoU 距离小于 0.15 的索引对
        pairs = np.where(pdist < 0.15)
        dupa, dupb = [], []
        # 根据跟踪器的时间长度,判断哪些是重复的跟踪器
        for p, q in zip(*pairs):
            timep = stracksa[p].frame_id - stracksa[p].start_frame
            timeq = stracksb[q].frame_id - stracksb[q].start_frame
            if timep > timeq:
                dupb.append(q)
            else:
                dupa.append(p)
        # 返回去除重复跟踪器后的结果列表
        resa = [t for i, t in enumerate(stracksa) if i not in dupa]
        resb = [t for i, t in enumerate(stracksb) if i not in dupb]
        return resa, resb

Multi-Object Tracking with Ultralytics YOLO

YOLOv8 trackers visualization

Object tracking in the realm of video analytics is a critical task that not only identifies the location and class of objects within the frame but also maintains a unique ID for each detected object as the video progresses. The applications are limitless—ranging from surveillance and security to real-time sports analytics.

Why Choose Ultralytics YOLO for Object Tracking?

The output from Ultralytics trackers is consistent with standard object detection but has the added value of object IDs. This makes it easy to track objects in video streams and perform subsequent analytics. Here's why you should consider using Ultralytics YOLO for your object tracking needs:

  • Efficiency: Process video streams in real-time without compromising accuracy.
  • Flexibility: Supports multiple tracking algorithms and configurations.
  • Ease of Use: Simple Python API and CLI options for quick integration and deployment.
  • Customizability: Easy to use with custom trained YOLO models, allowing integration into domain-specific applications.

Video Tutorial: Object Detection and Tracking with Ultralytics YOLOv8.

Features at a Glance

Ultralytics YOLO extends its object detection features to provide robust and versatile object tracking:

  • Real-Time Tracking: Seamlessly track objects in high-frame-rate videos.
  • Multiple Tracker Support: Choose from a variety of established tracking algorithms.
  • Customizable Tracker Configurations: Tailor the tracking algorithm to meet specific requirements by adjusting various parameters.

Available Trackers

Ultralytics YOLO supports the following tracking algorithms. They can be enabled by passing the relevant YAML configuration file such as tracker=tracker_type.yaml:

  • BoT-SORT - Use botsort.yaml to enable this tracker.
  • ByteTrack - Use bytetrack.yaml to enable this tracker.

The default tracker is BoT-SORT.

Tracking

To run the tracker on video streams, use a trained Detect, Segment or Pose model such as YOLOv8n, YOLOv8n-seg and YOLOv8n-pose.

Python

from ultralytics import YOLO

# Load an official or custom model
model = YOLO("yolov8n.pt")  # Load an official Detect model
model = YOLO("yolov8n-seg.pt")  # Load an official Segment model
model = YOLO("yolov8n-pose.pt")  # Load an official Pose model
model = YOLO("path/to/best.pt")  # Load a custom trained model

# Perform tracking with the model
results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True)  # Tracking with default tracker
results = model.track(
    source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml"
)  # Tracking with ByteTrack tracker

CLI

# Perform tracking with various models using the command line interface
yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4"  # Official Detect model
yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4"  # Official Segment model
yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4"  # Official Pose model
yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4"  # Custom trained model

# Track using ByteTrack tracker
yolo track model=path/to/best.pt tracker="bytetrack.yaml"

As can be seen in the above usage, tracking is available for all Detect, Segment and Pose models run on videos or streaming sources.

Configuration

Tracking Arguments

Tracking configuration shares properties with Predict mode, such as conf, iou, and show. For further configurations, refer to the Predict model page.

Python

from ultralytics import YOLO

# Configure the tracking parameters and run the tracker
model = YOLO("yolov8n.pt")
results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True)

CLI

# Configure tracking parameters and run the tracker using the command line interface
yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show

Tracker Selection

Ultralytics also allows you to use a modified tracker configuration file. To do this, simply make a copy of a tracker config file (for example, custom_tracker.yaml) from ultralytics/cfg/trackers and modify any configurations (except the tracker_type) as per your needs.

Python

from ultralytics import YOLO

# Load the model and run the tracker with a custom configuration file
model = YOLO("yolov8n.pt")
results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker="custom_tracker.yaml")

CLI

# Load the model and run the tracker with a custom configuration file using the command line interface
yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml'

For a comprehensive list of tracking arguments, refer to the ultralytics/cfg/trackers page.

Python Examples

Persisting Tracks Loop

Here is a Python script using OpenCV (cv2) and YOLOv8 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (opencv-python and ultralytics). The persist=True argument tells the tracker than the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image.

Python

import cv2

from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO("yolov8n.pt")

# Open the video file
video_path = "path/to/video.mp4"
cap = cv2.VideoCapture(video_path)

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

Please note the change from model(frame) to model.track(frame), which enables object tracking instead of simple detection. This modified script will run the tracker on each frame of the video, visualize the results, and display them in a window. The loop can be exited by pressing 'q'.

Plotting Tracks Over Time

Visualizing object tracks over consecutive frames can provide valuable insights into the movement patterns and behavior of detected objects within a video. With Ultralytics YOLOv8, plotting these tracks is a seamless and efficient process.

In the following example, we demonstrate how to utilize YOLOv8's tracking capabilities to plot the movement of detected objects across multiple video frames. This script involves opening a video file, reading it frame by frame, and utilizing the YOLO model to identify and track various objects. By retaining the center points of the detected bounding boxes and connecting them, we can draw lines that represent the paths followed by the tracked objects.

Python

from collections import defaultdict

import cv2
import numpy as np

from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO("yolov8n.pt")

# Open the video file
video_path = "path/to/video.mp4"
cap = cv2.VideoCapture(video_path)

# Store the track history
track_history = defaultdict(lambda: [])

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True)

        # Get the boxes and track IDs
        boxes = results[0].boxes.xywh.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Plot the tracks
        for box, track_id in zip(boxes, track_ids):
            x, y, w, h = box
            track = track_history[track_id]
            track.append((float(x), float(y)))  # x, y center point
            if len(track) > 30:  # retain 90 tracks for 90 frames
                track.pop(0)

            # Draw the tracking lines
            points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
            cv2.polylines(
                annotated_frame,
                [points],
                isClosed=False,
                color=(230, 230, 230),
                thickness=10,
            )

        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

Multithreaded Tracking

Multithreaded tracking provides the capability to run object tracking on multiple video streams simultaneously. This is particularly useful when handling multiple video inputs, such as from multiple surveillance cameras, where concurrent processing can greatly enhance efficiency and performance.

In the provided Python script, we make use of Python's threading module to run multiple instances of the tracker concurrently. Each thread is responsible for running the tracker on one video file, and all the threads run simultaneously in the background.

To ensure that each thread receives the correct parameters (the video file and the model to use), we define a function run_tracker_in_thread that accepts these parameters and contains the main tracking loop. This function reads the video frame by frame, runs the tracker, and displays the results.

Two different models are used in this example: yolov8n.pt and yolov8n-seg.pt, each tracking objects in a different video file. The video files are specified in video_file1 and video_file2.

The daemon=True parameter in threading.Thread means that these threads will be closed as soon as the main program finishes. We then start the threads with start() and use join() to make the main thread wait until both tracker threads have finished.

Finally, after all threads have completed their task, the windows displaying the results are closed using cv2.destroyAllWindows().

Python

import threading

import cv2

from ultralytics import YOLO


def run_tracker_in_thread(filename, model):
    """Starts multi-thread tracking on video from `filename` using `model` and displays results frame by frame."""
    video = cv2.VideoCapture(filename)
    frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    for _ in range(frames):
        ret, frame = video.read()
        if ret:
            results = model.track(source=frame, persist=True)
            res_plotted = results[0].plot()
            cv2.imshow("p", res_plotted)
            if cv2.waitKey(1) == ord("q"):
                break


# Load the models
model1 = YOLO("yolov8n.pt")
model2 = YOLO("yolov8n-seg.pt")

# Define the video files for the trackers
video_file1 = "path/to/video1.mp4"
video_file2 = "path/to/video2.mp4"

# Create the tracker threads
tracker_thread1 = threading.Thread(target=run_tracker_in_thread, args=(video_file1, model1), daemon=True)
tracker_thread2 = threading.Thread(target=run_tracker_in_thread, args=(video_file2, model2), daemon=True)

# Start the tracker threads
tracker_thread1.start()
tracker_thread2.start()

# Wait for the tracker threads to finish
tracker_thread1.join()
tracker_thread2.join()

# Clean up and close windows
cv2.destroyAllWindows()

This example can easily be extended to handle more video files and models by creating more threads and applying the same methodology.

Contribute New Trackers

Are you proficient in multi-object tracking and have successfully implemented or adapted a tracking algorithm with Ultralytics YOLO? We invite you to contribute to our Trackers section in ultralytics/cfg/trackers! Your real-world applications and solutions could be invaluable for users working on tracking tasks.

By contributing to this section, you help expand the scope of tracking solutions available within the Ultralytics YOLO framework, adding another layer of functionality and utility for the community.

To initiate your contribution, please refer to our Contributing Guide for comprehensive instructions on submitting a Pull Request (PR) 🛠️. We are excited to see what you bring to the table!

Together, let's enhance the tracking capabilities of the Ultralytics YOLO ecosystem 🙏!

.\yolov8\ultralytics\trackers\track.py

# 导入必要的模块和库
from functools import partial
from pathlib import Path
import torch

# 导入自定义工具函数和类
from ultralytics.utils import IterableSimpleNamespace, yaml_load
from ultralytics.utils.checks import check_yaml

# 导入自定义的追踪器类
from .bot_sort import BOTSORT
from .byte_tracker import BYTETracker

# 将追踪器类型映射到相应的追踪器类的字典
TRACKER_MAP = {"bytetrack": BYTETracker, "botsort": BOTSORT}


def on_predict_start(predictor: object, persist: bool = False) -> None:
    """
    初始化对象追踪器,用于预测过程中的目标追踪。

    Args:
        predictor (object): 需要初始化追踪器的预测器对象。
        persist (bool, optional): 是否在追踪器已存在时持久化。默认为 False。

    Raises:
        AssertionError: 如果追踪器类型不是 'bytetrack' 或 'botsort'。
    """
    # 如果预测器对象已经有 'trackers' 属性且 persist=True,则直接返回
    if hasattr(predictor, "trackers") and persist:
        return

    # 检查并加载追踪器配置信息
    tracker = check_yaml(predictor.args.tracker)
    cfg = IterableSimpleNamespace(**yaml_load(tracker))

    # 如果配置中的追踪器类型不是支持的 'bytetrack' 或 'botsort',抛出异常
    if cfg.tracker_type not in {"bytetrack", "botsort"}:
        raise AssertionError(f"Only 'bytetrack' and 'botsort' are supported for now, but got '{cfg.tracker_type}'")

    trackers = []
    # 根据数据集的批量大小初始化追踪器对象列表
    for _ in range(predictor.dataset.bs):
        # 根据配置中的追踪器类型创建对应的追踪器对象
        tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
        trackers.append(tracker)
        # 如果数据集模式不是 'stream',只需要一个追踪器对象
        if predictor.dataset.mode != "stream":
            break
    # 将创建的追踪器对象列表赋值给预测器对象的 'trackers' 属性
    predictor.trackers = trackers
    # 初始化一个列表用于存储每个批次视频路径,用于在新视频时重置追踪器
    predictor.vid_path = [None] * predictor.dataset.bs


def on_predict_postprocess_end(predictor: object, persist: bool = False) -> None:
    """
    对检测到的框进行后处理,并更新对象追踪信息。

    Args:
        predictor (object): 包含预测结果的预测器对象。
        persist (bool, optional): 是否在追踪器已存在时持久化。默认为 False。
    """
    # 获取批次中的路径和图像数据
    path, im0s = predictor.batch[:2]

    # 是否为旋转矩形目标检测任务
    is_obb = predictor.args.task == "obb"
    # 是否为数据流模式
    is_stream = predictor.dataset.mode == "stream"

    # 遍历每张图像
    for i in range(len(im0s)):
        # 获取当前图像对应的追踪器对象
        tracker = predictor.trackers[i if is_stream else 0]
        # 获取当前图像对应的视频路径
        vid_path = predictor.save_dir / Path(path[i]).name

        # 如果不需要持久化且当前视频路径与上一次不同,则重置追踪器
        if not persist and predictor.vid_path[i if is_stream else 0] != vid_path:
            tracker.reset()
            predictor.vid_path[i if is_stream else 0] = vid_path

        # 获取检测到的目标框信息
        det = (predictor.results[i].obb if is_obb else predictor.results[i].boxes).cpu().numpy()

        # 如果没有检测到目标,则继续处理下一张图像
        if len(det) == 0:
            continue

        # 更新追踪器并获取更新后的轨迹信息
        tracks = tracker.update(det, im0s[i])

        # 如果没有有效的轨迹信息,则继续处理下一张图像
        if len(tracks) == 0:
            continue

        # 根据轨迹信息对预测结果进行重新排序
        idx = tracks[:, -1].astype(int)
        predictor.results[i] = predictor.results[i][idx]

        # 更新预测结果对象中的目标框或旋转矩形信息
        update_args = {"obb" if is_obb else "boxes": torch.as_tensor(tracks[:, :-1])}
        predictor.results[i].update(**update_args)


def register_tracker(model: object, persist: bool) -> None:
    """
    空函数,用于注册模型的追踪器。

    Args:
        model (object): 需要注册追踪器的模型对象。
        persist (bool): 是否持久化追踪器。
    """
    pass
    # 为模型注册回调函数,用于在预测期间进行对象跟踪

    # 在预测开始时添加回调函数,使用 partial 函数将 persist 参数绑定到 on_predict_start 函数
    model.add_callback("on_predict_start", partial(on_predict_start, persist=persist))

    # 在预测后处理结束时添加回调函数,使用 partial 函数将 persist 参数绑定到 on_predict_postprocess_end 函数
    model.add_callback("on_predict_postprocess_end", partial(on_predict_postprocess_end, persist=persist))

.\yolov8\ultralytics\trackers\utils\gmc.py

# 导入必要的模块和库

import copy  # 导入 copy 模块,用于对象的深拷贝操作

import cv2  # 导入 OpenCV 库,用于图像处理和计算机视觉任务
import numpy as np  # 导入 NumPy 库,用于处理数值数据

from ultralytics.utils import LOGGER  # 从 ultralytics.utils 模块中导入 LOGGER 对象


class GMC:
    """
    Generalized Motion Compensation (GMC) class for tracking and object detection in video frames.

    This class provides methods for tracking and detecting objects based on several tracking algorithms including ORB,
    SIFT, ECC, and Sparse Optical Flow. It also supports downscaling of frames for computational efficiency.

    Attributes:
        method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
        downscale (int): Factor by which to downscale the frames for processing.
        prevFrame (np.ndarray): Stores the previous frame for tracking.
        prevKeyPoints (list): Stores the keypoints from the previous frame.
        prevDescriptors (np.ndarray): Stores the descriptors from the previous frame.
        initializedFirstFrame (bool): Flag to indicate if the first frame has been processed.

    Methods:
        __init__(self, method='sparseOptFlow', downscale=2): Initializes a GMC object with the specified method
                                                              and downscale factor.
        apply(self, raw_frame, detections=None): Applies the chosen method to a raw frame and optionally uses
                                                 provided detections.
        applyEcc(self, raw_frame, detections=None): Applies the ECC algorithm to a raw frame.
        applyFeatures(self, raw_frame, detections=None): Applies feature-based methods like ORB or SIFT to a raw frame.
        applySparseOptFlow(self, raw_frame, detections=None): Applies the Sparse Optical Flow method to a raw frame.
    """
    # 初始化视频跟踪器,并设置指定的跟踪方法和图像缩放因子
    def __init__(self, method: str = "sparseOptFlow", downscale: int = 2) -> None:
        """
        Initialize a video tracker with specified parameters.

        Args:
            method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
            downscale (int): Downscale factor for processing frames.
        """
        # 调用父类的初始化方法
        super().__init__()

        # 设置跟踪方法和图像缩放因子
        self.method = method
        self.downscale = max(1, downscale)

        # 根据不同的跟踪方法进行初始化
        if self.method == "orb":
            # 使用ORB算法进行特征点检测和描述符提取
            self.detector = cv2.FastFeatureDetector_create(20)
            self.extractor = cv2.ORB_create()
            self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)

        elif self.method == "sift":
            # 使用SIFT算法进行特征点检测和描述符提取
            self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
            self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
            self.matcher = cv2.BFMatcher(cv2.NORM_L2)

        elif self.method == "ecc":
            # 设置ECC算法的参数
            number_of_iterations = 5000
            termination_eps = 1e-6
            self.warp_mode = cv2.MOTION_EUCLIDEAN
            self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)

        elif self.method == "sparseOptFlow":
            # 设置稀疏光流算法的参数
            self.feature_params = dict(
                maxCorners=1000, qualityLevel=0.01, minDistance=1, blockSize=3, useHarrisDetector=False, k=0.04
            )

        elif self.method in {"none", "None", None}:
            # 如果方法为'none',则将self.method设置为None
            self.method = None
        else:
            # 抛出异常,说明提供了未知的跟踪方法
            raise ValueError(f"Error: Unknown GMC method:{method}")

        # 初始化帧的前一帧,前一帧的关键点和描述符以及初始化状态标志
        self.prevFrame = None
        self.prevKeyPoints = None
        self.prevDescriptors = None
        self.initializedFirstFrame = False

    # 对输入的原始帧应用指定的方法进行对象检测
    def apply(self, raw_frame: np.array, detections: list = None) -> np.array:
        """
        Apply object detection on a raw frame using specified method.

        Args:
            raw_frame (np.ndarray): The raw frame to be processed.
            detections (list): List of detections to be used in the processing.

        Returns:
            (np.ndarray): Processed frame.

        Examples:
            >>> gmc = GMC()
            >>> gmc.apply(np.array([[1, 2, 3], [4, 5, 6]]))
            array([[1, 2, 3],
                   [4, 5, 6]])
        """
        # 根据设定的方法调用相应的处理函数进行处理
        if self.method in {"orb", "sift"}:
            return self.applyFeatures(raw_frame, detections)
        elif self.method == "ecc":
            return self.applyEcc(raw_frame)
        elif self.method == "sparseOptFlow":
            return self.applySparseOptFlow(raw_frame)
        else:
            # 如果方法未知或未设置,则返回一个2x3的单位矩阵
            return np.eye(2, 3)
    def applyEcc(self, raw_frame: np.array) -> np.array:
        """
        Apply ECC algorithm to a raw frame.

        Args:
            raw_frame (np.ndarray): The raw frame to be processed.

        Returns:
            (np.ndarray): Processed frame.

        Examples:
            >>> gmc = GMC()
            >>> gmc.applyEcc(np.array([[1, 2, 3], [4, 5, 6]]))
            array([[1, 2, 3],
                   [4, 5, 6]])
        """
        # 获取原始帧的高度、宽度和通道数
        height, width, _ = raw_frame.shape
        # 将彩色帧转换为灰度图像
        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
        # 创建一个2x3的单位矩阵作为初始变换矩阵H
        H = np.eye(2, 3, dtype=np.float32)

        # 如果需要对图像进行下采样
        if self.downscale > 1.0:
            # 对帧进行高斯模糊
            frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
            # 缩放帧的尺寸
            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
            # 更新宽度和高度为缩放后的尺寸
            width = width // self.downscale
            height = height // self.downscale

        # 处理第一帧的情况
        if not self.initializedFirstFrame:
            # 复制当前帧作为前一帧
            self.prevFrame = frame.copy()
            # 标记第一帧已初始化完成
            self.initializedFirstFrame = True
            # 返回初始变换矩阵H
            return H

        # 运行ECC算法,将结果存储在变换矩阵H中
        try:
            (_, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
        except Exception as e:
            # 若算法执行失败,记录警告信息并将变换矩阵H设置为单位矩阵
            LOGGER.warning(f"WARNING: find transform failed. Set warp as identity {e}")

        # 返回更新后的变换矩阵H
        return H
    # 应用稀疏光流方法到一个原始帧上

    # 获取原始帧的高度、宽度和通道数
    height, width, _ = raw_frame.shape

    # 将原始帧转换为灰度图像
    frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)

    # 初始化单位矩阵作为初始变换矩阵
    H = np.eye(2, 3)

    # 如果设置了下采样因子,则缩小图像尺寸
    if self.downscale > 1.0:
        frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))

    # 使用cv2.goodFeaturesToTrack函数找到关键点
    keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)

    # 处理第一帧的特殊情况
    if not self.initializedFirstFrame or self.prevKeyPoints is None:
        # 复制当前帧作为前一帧
        self.prevFrame = frame.copy()
        # 复制当前关键点作为前一帧的关键点
        self.prevKeyPoints = copy.copy(keypoints)
        # 标记已初始化第一帧
        self.initializedFirstFrame = True
        # 直接返回单位矩阵作为变换矩阵
        return H

    # 计算光流法中的点对应关系
    matchedKeypoints, status, _ = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)

    # 筛选出有效的对应点
    prevPoints = []
    currPoints = []

    for i in range(len(status)):
        if status[i]:
            prevPoints.append(self.prevKeyPoints[i])
            currPoints.append(matchedKeypoints[i])

    # 将筛选后的点转换为numpy数组
    prevPoints = np.array(prevPoints)
    currPoints = np.array(currPoints)

    # 使用RANSAC算法估计刚体变换矩阵
    if (prevPoints.shape[0] > 4) and (prevPoints.shape[0] == prevPoints.shape[0]):
        H, _ = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)

        # 如果设置了下采样因子,则根据比例调整变换矩阵的平移部分
        if self.downscale > 1.0:
            H[0, 2] *= self.downscale
            H[1, 2] *= self.downscale
    else:
        # 如果匹配点不足以进行估计,则记录警告信息
        LOGGER.warning("WARNING: not enough matching points")

    # 更新前一帧和前一帧的关键点
    self.prevFrame = frame.copy()
    self.prevKeyPoints = copy.copy(keypoints)

    # 返回估计得到的变换矩阵
    return H


def reset_params(self) -> None:
    """重置参数方法,将所有跟踪状态重置为初始状态。"""
    self.prevFrame = None
    self.prevKeyPoints = None
    self.prevDescriptors = None
    self.initializedFirstFrame = False

.\yolov8\ultralytics\trackers\utils\kalman_filter.py

# Ultralytics YOLO 🚀, AGPL-3.0 license

import numpy as np
import scipy.linalg


class KalmanFilterXYAH:
    """
    For bytetrack. A simple Kalman filter for tracking bounding boxes in image space.

    The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), aspect
    ratio a, height h, and their respective velocities.

    Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct
    observation of the state space (linear observation model).
    """

    def __init__(self):
        """Initialize Kalman filter model matrices with motion and observation uncertainty weights."""
        ndim, dt = 4, 1.0

        # Create Kalman filter model matrices
        # 初始化 Kalman 滤波器的运动模型矩阵
        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
        for i in range(ndim):
            self._motion_mat[i, ndim + i] = dt
        
        # 观测矩阵,将状态空间映射到观测空间
        self._update_mat = np.eye(ndim, 2 * ndim)

        # Motion and observation uncertainty are chosen relative to the current state estimate. These weights control
        # the amount of uncertainty in the model.
        # 运动和观测的不确定性权重,相对于当前状态估计来选择
        self._std_weight_position = 1.0 / 20
        self._std_weight_velocity = 1.0 / 160

    def initiate(self, measurement: np.ndarray) -> tuple:
        """
        Create track from unassociated measurement.

        Args:
            measurement (ndarray): Bounding box coordinates (x, y, a, h) with center position (x, y), aspect ratio a,
                and height h.

        Returns:
            (tuple[ndarray, ndarray]): Returns the mean vector (8 dimensional) and covariance matrix (8x8 dimensional)
                of the new track. Unobserved velocities are initialized to 0 mean.
        """
        # 使用未关联的测量创建跟踪轨迹

        # 初始位置为测量值
        mean_pos = measurement
        # 初始速度为零向量
        mean_vel = np.zeros_like(mean_pos)
        # 组合成完整的状态向量(位置和速度)
        mean = np.r_[mean_pos, mean_vel]

        # 根据测量的不确定性,设置协方差矩阵的标准差
        std = [
            2 * self._std_weight_position * measurement[3],
            2 * self._std_weight_position * measurement[3],
            1e-2,
            2 * self._std_weight_position * measurement[3],
            10 * self._std_weight_velocity * measurement[3],
            10 * self._std_weight_velocity * measurement[3],
            1e-5,
            10 * self._std_weight_velocity * measurement[3],
        ]
        # 根据标准差创建对角协方差矩阵
        covariance = np.diag(np.square(std))
        # 返回初始化的状态向量和协方差矩阵
        return mean, covariance
    def predict(self, mean: np.ndarray, covariance: np.ndarray) -> tuple:
        """
        Run Kalman filter prediction step.

        Args:
            mean (ndarray): The 8 dimensional mean vector of the object state at the previous time step.
            covariance (ndarray): The 8x8 dimensional covariance matrix of the object state at the previous time step.

        Returns:
            (tuple[ndarray, ndarray]): Returns the mean vector and covariance matrix of the predicted state. Unobserved
                velocities are initialized to 0 mean.
        """
        # 根据位置权重和对象速度初始化预测步骤中的位置标准偏差
        std_pos = [
            self._std_weight_position * mean[3],
            self._std_weight_position * mean[3],
            1e-2,
            self._std_weight_position * mean[3],
        ]
        # 根据速度权重和对象速度初始化预测步骤中的速度标准偏差
        std_vel = [
            self._std_weight_velocity * mean[3],
            self._std_weight_velocity * mean[3],
            1e-5,
            self._std_weight_velocity * mean[3],
        ]
        # 计算运动噪声协方差矩阵
        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))

        # 预测对象状态的均值向量,使用运动矩阵进行状态预测
        mean = np.dot(mean, self._motion_mat.T)
        # 预测对象状态的协方差矩阵,考虑运动和运动噪声的影响
        covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov

        return mean, covariance

    def project(self, mean: np.ndarray, covariance: np.ndarray) -> tuple:
        """
        Project state distribution to measurement space.

        Args:
            mean (ndarray): The state's mean vector (8 dimensional array).
            covariance (ndarray): The state's covariance matrix (8x8 dimensional).

        Returns:
            (tuple[ndarray, ndarray]): Returns the projected mean and covariance matrix of the given state estimate.
        """
        # 根据位置权重和对象速度初始化测量空间中的标准偏差
        std = [
            self._std_weight_position * mean[3],
            self._std_weight_position * mean[3],
            1e-1,
            self._std_weight_position * mean[3],
        ]
        # 计算创新协方差矩阵,用于测量预测和实际测量之间的差异
        innovation_cov = np.diag(np.square(std))

        # 将状态分布投影到测量空间中的均值向量,使用更新矩阵进行投影
        mean = np.dot(self._update_mat, mean)
        # 将状态分布投影到测量空间中的协方差矩阵,考虑创新噪声的影响
        covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T))
        return mean, covariance + innovation_cov
    def multi_predict(self, mean: np.ndarray, covariance: np.ndarray) -> tuple:
        """
        Run Kalman filter prediction step (Vectorized version).

        Args:
            mean (ndarray): The Nx8 dimensional mean matrix of the object states at the previous time step.
            covariance (ndarray): The Nx8x8 covariance matrix of the object states at the previous time step.

        Returns:
            (tuple[ndarray, ndarray]): Returns the mean vector and covariance matrix of the predicted state. Unobserved
                velocities are initialized to 0 mean.
        """
        # Calculate standard deviations for position and velocity components
        std_pos = [
            self._std_weight_position * mean[:, 3],     # Standard deviation for x position
            self._std_weight_position * mean[:, 3],     # Standard deviation for y position
            1e-2 * np.ones_like(mean[:, 3]),            # Small value for aspect ratio 'a'
            self._std_weight_position * mean[:, 3],     # Standard deviation for height 'h'
        ]
        std_vel = [
            self._std_weight_velocity * mean[:, 3],     # Standard deviation for x velocity
            self._std_weight_velocity * mean[:, 3],     # Standard deviation for y velocity
            1e-5 * np.ones_like(mean[:, 3]),           # Small value for velocity in 'a'
            self._std_weight_velocity * mean[:, 3],     # Standard deviation for velocity in 'h'
        ]

        # Stack standard deviations and square them
        sqr = np.square(np.r_[std_pos, std_vel]).T

        # Create motion covariance matrices
        motion_cov = [np.diag(sqr[i]) for i in range(len(mean))]
        motion_cov = np.asarray(motion_cov)

        # Predict new mean using motion model
        mean = np.dot(mean, self._motion_mat.T)

        # Calculate left multiplication for covariance update
        left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))

        # Update covariance matrix incorporating motion and motion_cov
        covariance = np.dot(left, self._motion_mat.T) + motion_cov

        return mean, covariance

    def update(self, mean: np.ndarray, covariance: np.ndarray, measurement: np.ndarray) -> tuple:
        """
        Run Kalman filter correction step.

        Args:
            mean (ndarray): The predicted state's mean vector (8 dimensional).
            covariance (ndarray): The state's covariance matrix (8x8 dimensional).
            measurement (ndarray): The 4 dimensional measurement vector (x, y, a, h), where (x, y) is the center
                position, a the aspect ratio, and h the height of the bounding box.

        Returns:
            (tuple[ndarray, ndarray]): Returns the measurement-corrected state distribution.
        """
        # Project predicted mean and covariance based on measurement model
        projected_mean, projected_cov = self.project(mean, covariance)

        # Compute Cholesky factorization of projected_cov
        chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)

        # Compute Kalman gain using Cholesky factor and update matrix
        kalman_gain = scipy.linalg.cho_solve(
            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, check_finite=False
        ).T

        # Compute innovation (difference between measurement and projected mean)
        innovation = measurement - projected_mean

        # Update mean and covariance using Kalman gain and innovation
        new_mean = mean + np.dot(innovation, kalman_gain.T)
        new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T))

        return new_mean, new_covariance

    def gating_distance(
        self,
        mean: np.ndarray,
        covariance: np.ndarray,
        measurements: np.ndarray,
        only_position: bool = False,
        metric: str = "maha",
    ) -> np.ndarray:
        """
        计算状态分布与测量之间的门控距离。适当的距离阈值可以从 `chi2inv95` 获得。如果 `only_position` 是 False,
        则卡方分布的自由度为4,否则为2。

        Args:
            mean (ndarray): 状态分布的均值向量(8维)。
            covariance (ndarray): 状态分布的协方差(8x8维)。
            measurements (ndarray): 包含N个测量值的矩阵,每个测量值格式为 (x, y, a, h),其中 (x, y) 是边界框的中心位置,
                a 是长宽比,h 是高度。
            only_position (bool, optional): 如果为 True,则仅基于边界框中心位置计算距离。默认为 False。
            metric (str, optional): 用于计算距离的度量标准。选项有 'gaussian' 表示平方欧氏距离,'maha' 表示平方马氏距离。
                默认为 'maha'。

        Returns:
            (np.ndarray): 返回长度为N的数组,其中第i个元素包含 (mean, covariance) 与 `measurements[i]` 之间的平方距离。
        """
        # 将均值和协方差投影到合适的维度
        mean, covariance = self.project(mean, covariance)
        
        if only_position:
            # 如果只计算位置,则仅保留均值和协方差的前两个维度
            mean, covariance = mean[:2], covariance[:2, :2]
            measurements = measurements[:, :2]

        # 计算测量值与均值之间的差异
        d = measurements - mean
        
        if metric == "gaussian":
            # 计算平方欧氏距离
            return np.sum(d * d, axis=1)
        elif metric == "maha":
            # 计算平方马氏距离
            cholesky_factor = np.linalg.cholesky(covariance)
            z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True)
            return np.sum(z * z, axis=0)  # 平方马氏距离
        else:
            raise ValueError("无效的距离度量标准")
class KalmanFilterXYWH(KalmanFilterXYAH):
    """
    For BoT-SORT. A simple Kalman filter for tracking bounding boxes in image space.

    The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y), width
    w, height h, and their respective velocities.

    Object motion follows a constant velocity model. The bounding box location (x, y, w, h) is taken as direct
    observation of the state space (linear observation model).
    """

    def initiate(self, measurement: np.ndarray) -> tuple:
        """
        Create track from unassociated measurement.

        Args:
            measurement (ndarray): Bounding box coordinates (x, y, w, h) with center position (x, y), width, and height.

        Returns:
            (tuple[ndarray, ndarray]): Returns the mean vector (8 dimensional) and covariance matrix (8x8 dimensional)
                of the new track. Unobserved velocities are initialized to 0 mean.
        """
        # 使用测量值初始化均值向量,包括位置和速度
        mean_pos = measurement
        mean_vel = np.zeros_like(mean_pos)
        mean = np.r_[mean_pos, mean_vel]

        # 计算测量值的标准差,用于构建协方差矩阵
        std = [
            2 * self._std_weight_position * measurement[2],
            2 * self._std_weight_position * measurement[3],
            2 * self._std_weight_position * measurement[2],
            2 * self._std_weight_position * measurement[3],
            10 * self._std_weight_velocity * measurement[2],
            10 * self._std_weight_velocity * measurement[3],
            10 * self._std_weight_velocity * measurement[2],
            10 * self._std_weight_velocity * measurement[3],
        ]
        covariance = np.diag(np.square(std))
        return mean, covariance

    def predict(self, mean, covariance) -> tuple:
        """
        Run Kalman filter prediction step.

        Args:
            mean (ndarray): The 8 dimensional mean vector of the object state at the previous time step.
            covariance (ndarray): The 8x8 dimensional covariance matrix of the object state at the previous time step.

        Returns:
            (tuple[ndarray, ndarray]): Returns the mean vector and covariance matrix of the predicted state. Unobserved
                velocities are initialized to 0 mean.
        """
        # 计算位置和速度的预测标准差
        std_pos = [
            self._std_weight_position * mean[2],
            self._std_weight_position * mean[3],
            self._std_weight_position * mean[2],
            self._std_weight_position * mean[3],
        ]
        std_vel = [
            self._std_weight_velocity * mean[2],
            self._std_weight_velocity * mean[3],
            self._std_weight_velocity * mean[2],
            self._std_weight_velocity * mean[3],
        ]
        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))

        # 预测下一个时刻的均值和协方差矩阵
        mean = np.dot(mean, self._motion_mat.T)
        covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov

        return mean, covariance
    def project(self, mean, covariance) -> tuple:
        """
        Project state distribution to measurement space.

        Args:
            mean (ndarray): The state's mean vector (8 dimensional array).
            covariance (ndarray): The state's covariance matrix (8x8 dimensional).

        Returns:
            (tuple[ndarray, ndarray]): Returns the projected mean and covariance matrix of the given state estimate.
        """
        # Calculate standard deviations for position components based on mean values
        std = [
            self._std_weight_position * mean[2],
            self._std_weight_position * mean[3],
            self._std_weight_position * mean[2],
            self._std_weight_position * mean[3],
        ]
        # Create innovation covariance matrix from computed standard deviations
        innovation_cov = np.diag(np.square(std))

        # Project the mean vector using the update matrix
        mean = np.dot(self._update_mat, mean)
        # Project the covariance matrix using the update matrix
        covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T))
        # Return the projected mean and adjusted covariance matrix
        return mean, covariance + innovation_cov

    def multi_predict(self, mean, covariance) -> tuple:
        """
        Run Kalman filter prediction step (Vectorized version).

        Args:
            mean (ndarray): The Nx8 dimensional mean matrix of the object states at the previous time step.
            covariance (ndarray): The Nx8x8 covariance matrix of the object states at the previous time step.

        Returns:
            (tuple[ndarray, ndarray]): Returns the mean vector and covariance matrix of the predicted state. Unobserved
                velocities are initialized to 0 mean.
        """
        # Compute standard deviations for position and velocity components
        std_pos = [
            self._std_weight_position * mean[:, 2],
            self._std_weight_position * mean[:, 3],
            self._std_weight_position * mean[:, 2],
            self._std_weight_position * mean[:, 3],
        ]
        std_vel = [
            self._std_weight_velocity * mean[:, 2],
            self._std_weight_velocity * mean[:, 3],
            self._std_weight_velocity * mean[:, 2],
            self._std_weight_velocity * mean[:, 3],
        ]
        # Stack position and velocity standard deviations and compute their squares
        sqr = np.square(np.r_[std_pos, std_vel]).T

        # Construct motion covariance matrices for each object in the prediction
        motion_cov = [np.diag(sqr[i]) for i in range(len(mean))]
        motion_cov = np.asarray(motion_cov)

        # Predict mean vector using motion matrix
        mean = np.dot(mean, self._motion_mat.T)
        # Predict covariance matrix incorporating motion covariance
        left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
        covariance = np.dot(left, self._motion_mat.T) + motion_cov

        # Return predicted mean and covariance matrix
        return mean, covariance

    def update(self, mean, covariance, measurement) -> tuple:
        """
        Run Kalman filter correction step.

        Args:
            mean (ndarray): The predicted state's mean vector (8 dimensional).
            covariance (ndarray): The state's covariance matrix (8x8 dimensional).
            measurement (ndarray): The 4 dimensional measurement vector (x, y, w, h), where (x, y) is the center
                position, w the width, and h the height of the bounding box.

        Returns:
            (tuple[ndarray, ndarray]): Returns the measurement-corrected state distribution.
        """
        # Delegate the update step to the superclass, passing mean, covariance, and measurement
        return super().update(mean, covariance, measurement)

.\yolov8\ultralytics\trackers\utils\matching.py

# Ultralytics YOLO 🚀, AGPL-3.0 license

import numpy as np  # 导入 NumPy 库,用于数值计算
import scipy  # 导入 SciPy 库,用于科学计算
from scipy.spatial.distance import cdist  # 从 SciPy 库的 spatial.distance 模块导入 cdist 函数

from ultralytics.utils.metrics import batch_probiou, bbox_ioa  # 从 ultralytics.utils.metrics 模块导入 batch_probiou 和 bbox_ioa 函数

try:
    import lap  # 尝试导入 lap 模块,用于 linear_assignment

    assert lap.__version__  # 验证 lap 模块不是一个目录
except (ImportError, AssertionError, AttributeError):
    from ultralytics.utils.checks import check_requirements

    check_requirements("lapx>=0.5.2")  # 检查 lapx 版本是否符合要求,推荐从 https://github.com/rathaROG/lapx 更新到 lap 包
    import lap  # 导入 lap 模块

def linear_assignment(cost_matrix: np.ndarray, thresh: float, use_lap: bool = True) -> tuple:
    """
    Perform linear assignment using scipy or lap.lapjv.

    Args:
        cost_matrix (np.ndarray): The matrix containing cost values for assignments.
        thresh (float): Threshold for considering an assignment valid.
        use_lap (bool, optional): Whether to use lap.lapjv. Defaults to True.

    Returns:
        Tuple with:
            - matched indices
            - unmatched indices from 'a'
            - unmatched indices from 'b'
    """

    if cost_matrix.size == 0:
        return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))

    if use_lap:
        # Use lap.lapjv
        # https://github.com/gatagat/lap
        _, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
        matches = [[ix, mx] for ix, mx in enumerate(x) if mx >= 0]  # 构建匹配对列表,包含匹配的索引对
        unmatched_a = np.where(x < 0)[0]  # 找出未匹配的 'a' 索引
        unmatched_b = np.where(y < 0)[0]  # 找出未匹配的 'b' 索引
    else:
        # Use scipy.optimize.linear_sum_assignment
        # https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html
        x, y = scipy.optimize.linear_sum_assignment(cost_matrix)  # 使用 SciPy 中的 linear_sum_assignment 函数进行线性分配
        matches = np.asarray([[x[i], y[i]] for i in range(len(x)) if cost_matrix[x[i], y[i]] <= thresh])  # 根据阈值筛选出有效匹配
        if len(matches) == 0:
            unmatched_a = list(np.arange(cost_matrix.shape[0]))  # 如果没有有效匹配,则所有 'a' 都为未匹配
            unmatched_b = list(np.arange(cost_matrix.shape[1]))  # 如果没有有效匹配,则所有 'b' 都为未匹配
        else:
            unmatched_a = list(set(np.arange(cost_matrix.shape[0])) - set(matches[:, 0]))  # 找出未匹配的 'a' 索引
            unmatched_b = list(set(np.arange(cost_matrix.shape[1])) - set(matches[:, 1]))  # 找出未匹配的 'b' 索引

    return matches, unmatched_a, unmatched_b


def iou_distance(atracks: list, btracks: list) -> np.ndarray:
    """
    Compute cost based on Intersection over Union (IoU) between tracks.

    Args:
        atracks (list[STrack] | list[np.ndarray]): List of tracks 'a' or bounding boxes.
        btracks (list[STrack] | list[np.ndarray]): List of tracks 'b' or bounding boxes.

    Returns:
        (np.ndarray): Cost matrix computed based on IoU.
    """

    if atracks and isinstance(atracks[0], np.ndarray) or btracks and isinstance(btracks[0], np.ndarray):
        atlbrs = atracks  # 将 atracks 赋值给 atlbrs
        btlbrs = btracks  # 将 btracks 赋值给 btlbrs
    # 如果条件不满足,执行以下操作:
    else:
        # 生成包含所有 A 跟踪目标边界框坐标的列表,若目标存在角度信息,则使用 xywha 格式,否则使用 xyxy 格式
        atlbrs = [track.xywha if track.angle is not None else track.xyxy for track in atracks]
        # 生成包含所有 B 跟踪目标边界框坐标的列表,若目标存在角度信息,则使用 xywha 格式,否则使用 xyxy 格式
        btlbrs = [track.xywha if track.angle is not None else track.xyxy for track in btracks]

    # 创建一个形状为 (len(atlbrs), len(btlbrs)) 的全零数组,用于存储计算的 IoU 值
    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float32)
    
    # 如果 atlbrs 和 btlbrs 列表都不为空
    if len(atlbrs) and len(btlbrs):
        # 如果列表中第一个元素是长度为 5 的数组,则调用 batch_probiou 计算 IoU
        if len(atlbrs[0]) == 5 and len(btlbrs[0]) == 5:
            ious = batch_probiou(
                np.ascontiguousarray(atlbrs, dtype=np.float32),  # 转换为连续内存的 numpy 数组
                np.ascontiguousarray(btlbrs, dtype=np.float32),  # 转换为连续内存的 numpy 数组
            ).numpy()  # 将结果转换为 numpy 数组
        else:
            # 否则,调用 bbox_ioa 计算 IoU,设定参数 iou=True
            ious = bbox_ioa(
                np.ascontiguousarray(atlbrs, dtype=np.float32),  # 转换为连续内存的 numpy 数组
                np.ascontiguousarray(btlbrs, dtype=np.float32),  # 转换为连续内存的 numpy 数组
                iou=True,  # 设置计算 IoU
            )
    
    # 返回 1 减去计算得到的 IoU 矩阵,即得到的 cost matrix
    return 1 - ious
# 计算跟踪目标与检测目标之间基于嵌入向量的距离矩阵

def embedding_distance(tracks: list, detections: list, metric: str = "cosine") -> np.ndarray:
    """
    Compute distance between tracks and detections based on embeddings.

    Args:
        tracks (list[STrack]): List of tracks.
            跟踪目标的列表,每个元素是一个STrack对象。
        detections (list[BaseTrack]): List of detections.
            检测目标的列表,每个元素是一个BaseTrack对象。
        metric (str, optional): Metric for distance computation. Defaults to 'cosine'.
            距离计算所用的度量标准,默认为余弦相似度。

    Returns:
        (np.ndarray): Cost matrix computed based on embeddings.
            基于嵌入向量计算的成本矩阵。
    """

    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32)
    if cost_matrix.size == 0:
        return cost_matrix

    # 提取检测目标的特征向量
    det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float32)

    # 提取跟踪目标的平滑特征向量
    track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float32)

    # 计算距离矩阵,使用指定的度量标准(metric)
    cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # Normalized features

    return cost_matrix


def fuse_score(cost_matrix: np.ndarray, detections: list) -> np.ndarray:
    """
    Fuses cost matrix with detection scores to produce a single similarity matrix.

    Args:
        cost_matrix (np.ndarray): The matrix containing cost values for assignments.
            包含分配成本值的矩阵。
        detections (list[BaseTrack]): List of detections with scores.
            带有分数的检测目标列表。

    Returns:
        (np.ndarray): Fused similarity matrix.
            融合后的相似性矩阵。
    """

    if cost_matrix.size == 0:
        return cost_matrix

    # 根据成本矩阵计算IoU相似度
    iou_sim = 1 - cost_matrix

    # 提取检测目标的分数
    det_scores = np.array([det.score for det in detections])

    # 将检测目标的分数扩展为与成本矩阵相同的形状
    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)

    # 将IoU相似度与检测目标分数相乘,进行融合
    fuse_sim = iou_sim * det_scores

    # 返回融合后的相似性矩阵,即融合成本
    return 1 - fuse_sim

.\yolov8\ultralytics\trackers\utils\__init__.py

# Ultralytics YOLO 🚀, AGPL-3.0 license
# 这行注释简要描述了代码的项目名称和许可证信息。

.\yolov8\ultralytics\trackers\__init__.py

# 导入依赖模块以支持Ultralytics YOLO,这里使用AGPL-3.0许可证

# 从当前包中导入BOTSORT类
from .bot_sort import BOTSORT
# 从当前包中导入BYTETracker类
from .byte_tracker import BYTETracker
# 从当前包中导入register_tracker函数
from .track import register_tracker

# 将register_tracker, BOTSORT, BYTETracker三个名称加入到模块的__all__列表中,
# 这样在使用from package import *时,只会导入这三个对象,使导入更加简洁
__all__ = "register_tracker", "BOTSORT", "BYTETracker"  # 允许更简单的导入方式

.\yolov8\ultralytics\utils\autobatch.py

# 导入深度复制函数
from copy import deepcopy
# 导入必要的库
import numpy as np
import torch

# 导入自定义模块
from ultralytics.utils import DEFAULT_CFG, LOGGER, colorstr
from ultralytics.utils.torch_utils import autocast, profile

# 定义检查训练批大小的函数
def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
    """
    使用 autobatch() 函数计算最佳 YOLO 训练批大小。

    Args:
        model (torch.nn.Module): 要检查批大小的 YOLO 模型。
        imgsz (int): 用于训练的图像尺寸。
        amp (bool): 如果为 True,使用自动混合精度 (AMP) 进行训练。

    Returns:
        (int): 使用 autobatch() 函数计算的最佳批大小。
    """

    with autocast(enabled=amp):
        return autobatch(deepcopy(model).train(), imgsz, fraction=batch if 0.0 < batch < 1.0 else 0.6)


# 定义自动估算最佳批大小的函数
def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
    """
    自动估算最佳 YOLO 批大小,以利用可用 CUDA 内存的一部分。

    Args:
        model (torch.nn.module): 要计算批大小的 YOLO 模型。
        imgsz (int, optional): YOLO 模型输入的图像大小。默认为 640。
        fraction (float, optional): 要使用的可用 CUDA 内存的分数。默认为 0.60。
        batch_size (int, optional): 如果检测到错误,则使用的默认批大小。默认为 16。

    Returns:
        (int): 最佳批大小。
    """

    # 检查设备
    prefix = colorstr("AutoBatch: ")
    LOGGER.info(f"{prefix}正在计算 imgsz={imgsz} 时,{fraction * 100}% CUDA 内存利用率下的最佳批大小。")
    device = next(model.parameters()).device  # 获取模型设备
    if device.type in {"cpu", "mps"}:
        LOGGER.info(f"{prefix} ⚠️ 仅适用于 CUDA 设备,使用默认批大小 {batch_size}")
        return batch_size
    if torch.backends.cudnn.benchmark:
        LOGGER.info(f"{prefix} ⚠️ 需要设置 torch.backends.cudnn.benchmark=False,使用默认批大小 {batch_size}")
        return batch_size

    # 检查 CUDA 内存
    gb = 1 << 30  # 字节转换为 GiB (1024 ** 3)
    d = str(device).upper()  # 'CUDA:0'
    properties = torch.cuda.get_device_properties(device)  # 设备属性
    t = properties.total_memory / gb  # 总 GiB
    r = torch.cuda.memory_reserved(device) / gb  # 已预留 GiB
    a = torch.cuda.memory_allocated(device) / gb  # 已分配 GiB
    f = t - (r + a)  # 可用 GiB
    LOGGER.info(f"{prefix}{d} ({properties.name}) {t:.2f}G 总共, {r:.2f}G 已预留, {a:.2f}G 已分配, {f:.2f}G 可用")

    # 分析不同批大小
    batch_sizes = [1, 2, 4, 8, 16]
    try:
        # 创建一个空的 PyTorch 张量列表,每个张量的大小为 (b, 3, imgsz, imgsz),其中 b 从 batch_sizes 中取值
        img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
        # 使用 profile 函数对 img、model 进行分析,n=3,使用指定设备 device
        results = profile(img, model, n=3, device=device)

        # 对结果进行拟合
        y = [x[2] for x in results if x]  # 获取结果中的内存使用情况 [2]
        # 对 batch_sizes 中的数据和 y 进行一次度为 1 的多项式拟合
        p = np.polyfit(batch_sizes[: len(y)], y, deg=1)  # 第一次多项式拟合
        # 计算出最佳的 batch size,通过拟合得到的 y 截距 (optimal batch size)
        b = int((f * fraction - p[1]) / p[0])

        # 如果结果中有 None,表示某些尺寸的计算失败
        if None in results:  # 一些尺寸计算失败
            i = results.index(None)  # 第一个失败的索引
            # 如果 b 大于等于失败尺寸的 batch size,选择前一个安全点的 batch size
            if b >= batch_sizes[i]:  # y 截距超过失败点
                b = batch_sizes[max(i - 1, 0)]  # 选择前一个安全点

        # 如果 b 小于 1 或大于 1024,超出安全范围,使用默认的 batch_size
        if b < 1 or b > 1024:  # b 超出安全范围
            b = batch_size
            LOGGER.info(f"{prefix}WARNING ⚠️ CUDA anomaly detected, using default batch-size {batch_size}.")

        # 计算实际预测的分数 fraction
        fraction = (np.polyval(p, b) + r + a) / t  # 实际预测的分数
        # 记录使用的 batch size 以及相关的内存信息
        LOGGER.info(f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅")
        # 返回计算得到的 batch size
        return b
    except Exception as e:
        # 捕获异常情况,记录警告信息,使用默认的 batch_size
        LOGGER.warning(f"{prefix}WARNING ⚠️ error detected: {e},  using default batch-size {batch_size}.")
        # 返回默认的 batch_size
        return batch_size
posted @ 2024-09-05 11:58  绝不原创的飞龙  阅读(275)  评论(0)    收藏  举报