# 随机仿射变换增广
class RandomAffine(DualTransform):
def __init__(self,
width, # 输出的图像宽度(int)
height, # 输出的图像高度(int)
angle_limit=(-45, +45), # 旋转的角度限制(float, tuple),如果为float,则范围是[-angle_limit, +angle_limit]
scale_limit=(0.8, 1.2), # 缩放尺度限制(float, tuple),如果为float,则范围是[1-scale_limit, 1+scale_limit]
offset_limit=(0.4, 0.6), # 平移限制(float, tuple),如果为float,则范围是[0.5-offset_limit, 0.5+offset_limit]
rotate_angle_threshold=30, # 旋转角度的判断阈值,当角度大于阈值时,返回的boxes[:, 4]指示为True,其他情况为False
min_size=10, # 目框大小的阈值(float, tuple),如果为float,则阈值是[width=size, height=size],低于阈值的框会被删掉
always_apply=False,
p=1.0):
super(RandomAffine, self).__init__(always_apply=always_apply, p=p)
self.width = width
self.height = height
self.rotate_angle_threshold = rotate_angle_threshold
self.scale_limit = self.to_tuple(scale_limit, 1)
self.offset_limit = self.to_tuple(offset_limit, 0.5)
self.angle_limit = self.to_tuple(angle_limit, 0)
self.min_size = self.to_tuple(min_size, None)
def apply(self, img, M, **params):
return cv2.warpAffine(img, M, (self.width, self.height))
def get_params_dependent_on_targets(self, params):
image = params["image"]
height, width = image.shape[:2]
scale = 1
angle = 0
cx = 0.5 * width
cy = 0.5 * height
angle = random.uniform(*self.angle_limit)
scale = random.uniform(*self.scale_limit)
cx = random.uniform(*self.offset_limit) * width
cy = random.uniform(*self.offset_limit) * height
M = cv2.getRotationMatrix2D((cx, cy), angle, scale)
M[0, 2] -= cx - self.width * 0.5
M[1, 2] -= cy - self.height * 0.5
return {"M": M, "scale": scale, "angle": angle, "image_width": width, "image_height": height}
# 因为继承自DualTransform,输入的bboxes会被归一化到0-1(除以输入图像的宽高)
# 输出也必须保证是0-1。因此输出除以output_size,也就是800, 800
def apply_to_bboxes(self, bboxes, M, scale, angle, image_width, image_height, **params):
if len(bboxes) == 0:
return []
# 因为bboxes被np.array转换为nx4的数组,为了实现4个维度,每个维度分别乘以[width, height, width, height]
# 利用广播机制,我们需要建立np_image_size数组,维度是1x4,所以是[[两个中括号]]
np_image_size = np.array([[image_width, image_height, image_width, image_height]])
# 因为输出部分需要除以self.width, self.height,但是输出部分维度是(4,),因此我们需要建立
# np_output_size,维度为(4,)
np_output_size = np.array([self.width, self.height, self.width, self.height])
# 获取每个box的中心位置, nx2
tail = np.array([item[4:] for item in bboxes])
npbboxes = np.array([item[:4] for item in bboxes]) * np_image_size
np_bboxes_center = np.array([[(x + r) * 0.5, (y + b) * 0.5] for x, y, r, b in npbboxes])
# 将nx2转换为2xn
np_bboxes_center_t_2row = np_bboxes_center.T
# 增加维度,变换为3xn,第三个行是全1
one_row = np.ones((1, np_bboxes_center_t_2row.shape[-1]))
np_bboxes_center_coordinate = np.vstack([np_bboxes_center_t_2row, one_row])
# 变换
project = M @ np_bboxes_center_coordinate
# 转换为list
list_project = project.T.tolist()
# scale乘以0.5,是return时cx - scale * width的转换。真值为cx - scale * 0.5 * width。此时合并scale * 0.5
half_scale = scale * 0.5
result = np.array([[
cx - (r - x + 1) * half_scale,
cy - (b - y + 1) * half_scale,
cx + (r - x + 1) * half_scale,
cy + (b - y + 1) * half_scale
] for (x, y, r, b), (cx, cy) in zip(npbboxes, list_project)])
# 限制框不能超出范围
x, y, r, b = result[:, 0], result[:, 1], result[:, 2], result[:, 3]
x[...] = x.clip(min=0, max=self.width-1)
y[...] = y.clip(min=0, max=self.height-1)
r[...] = r.clip(min=0, max=self.width-1)
b[...] = b.clip(min=0, max=self.height-1)
w = (r - x + 1).clip(min=0)
h = (b - y + 1).clip(min=0)
w_threshold, h_threshold = self.min_size
cond = (w >= w_threshold) & (h >= h_threshold)
# 对于tail0,认为是是否存在旋转,即目标是否大于30度
if abs(angle) > self.rotate_angle_threshold and len(tail) > 0:
tail[cond, 0] = True
output = [list(coord / np_output_size) + list(tail_item) for coord, tail_item in zip(result[cond], tail[cond])]
return output
@property
def targets(self):
return {
"image": self.apply,
"bboxes": self.apply_to_bboxes
}
def to_tuple(self, value, add_value=1):
if isinstance(value, tuple):
return value
elif add_value is not None:
return (add_value-value, add_value+value)
else:
return (value, value)
@property
def targets_as_params(self):
return ["image", "bboxes"]
def get_transform_init_args_names(self):
return "height", "width", "scale_limit", "angle_limit", "offset_limit", "rotate_angle_threshold"