
from albumentations.core.transforms_interface import DualTransform from albumentations.pytorch import ToTensorV2, ToTensor import random

# 随机仿射变换增广 class RandomAffine(DualTransform): def __init__(self, width, # 输出的图像宽度(int) height, # 输出的图像高度(int) angle_limit=(-45, +45), # 旋转的角度限制(float, tuple),如果为float,则范围是[-angle_limit, +angle_limit] scale_limit=(0.8, 1.2), # 缩放尺度限制(float, tuple),如果为float,则范围是[1-scale_limit, 1+scale_limit] offset_limit=(0.4, 0.6), # 平移限制(float, tuple),如果为float,则范围是[0.5-offset_limit, 0.5+offset_limit] rotate_angle_threshold=30, # 旋转角度的判断阈值,当角度大于阈值时,返回的boxes[:, 4]指示为True,其他情况为False min_size=10, # 目框大小的阈值(float, tuple),如果为float,则阈值是[width=size, height=size],低于阈值的框会被删掉 always_apply=False, p=1.0): super(RandomAffine, self).__init__(always_apply=always_apply, p=p) self.width = width self.height = height self.rotate_angle_threshold = rotate_angle_threshold self.scale_limit = self.to_tuple(scale_limit, 1) self.offset_limit = self.to_tuple(offset_limit, 0.5) self.angle_limit = self.to_tuple(angle_limit, 0) self.min_size = self.to_tuple(min_size, None) def apply(self, img, M, **params): return cv2.warpAffine(img, M, (self.width, self.height)) def get_params_dependent_on_targets(self, params): image = params["image"] height, width = image.shape[:2] scale = 1 angle = 0 cx = 0.5 * width cy = 0.5 * height angle = random.uniform(*self.angle_limit) scale = random.uniform(*self.scale_limit) cx = random.uniform(*self.offset_limit) * width cy = random.uniform(*self.offset_limit) * height M = cv2.getRotationMatrix2D((cx, cy), angle, scale) M[0, 2] -= cx - self.width * 0.5 M[1, 2] -= cy - self.height * 0.5 return {"M": M, "scale": scale, "angle": angle, "image_width": width, "image_height": height} # 因为继承自DualTransform,输入的bboxes会被归一化到0-1(除以输入图像的宽高) # 输出也必须保证是0-1。因此输出除以output_size,也就是800, 800 def apply_to_bboxes(self, bboxes, M, scale, angle, image_width, image_height, **params): if len(bboxes) == 0: return [] # 因为bboxes被np.array转换为nx4的数组,为了实现4个维度,每个维度分别乘以[width, height, width, height] # 利用广播机制,我们需要建立np_image_size数组,维度是1x4,所以是[[两个中括号]] np_image_size = np.array([[image_width, image_height, image_width, image_height]]) # 因为输出部分需要除以self.width, self.height,但是输出部分维度是(4,),因此我们需要建立 # np_output_size,维度为(4,) np_output_size = np.array([self.width, self.height, self.width, self.height]) # 获取每个box的中心位置, nx2 tail = np.array([item[4:] for item in bboxes]) npbboxes = np.array([item[:4] for item in bboxes]) * np_image_size np_bboxes_center = np.array([[(x + r) * 0.5, (y + b) * 0.5] for x, y, r, b in npbboxes]) # 将nx2转换为2xn np_bboxes_center_t_2row = np_bboxes_center.T # 增加维度,变换为3xn,第三个行是全1 one_row = np.ones((1, np_bboxes_center_t_2row.shape[-1])) np_bboxes_center_coordinate = np.vstack([np_bboxes_center_t_2row, one_row]) # 变换 project = M @ np_bboxes_center_coordinate # 转换为list list_project = project.T.tolist() # scale乘以0.5,是return时cx - scale * width的转换。真值为cx - scale * 0.5 * width。此时合并scale * 0.5 half_scale = scale * 0.5 result = np.array([[ cx - (r - x + 1) * half_scale, cy - (b - y + 1) * half_scale, cx + (r - x + 1) * half_scale, cy + (b - y + 1) * half_scale ] for (x, y, r, b), (cx, cy) in zip(npbboxes, list_project)]) # 限制框不能超出范围 x, y, r, b = result[:, 0], result[:, 1], result[:, 2], result[:, 3] x[...] = x.clip(min=0, max=self.width-1) y[...] = y.clip(min=0, max=self.height-1) r[...] = r.clip(min=0, max=self.width-1) b[...] = b.clip(min=0, max=self.height-1) w = (r - x + 1).clip(min=0) h = (b - y + 1).clip(min=0) w_threshold, h_threshold = self.min_size cond = (w >= w_threshold) & (h >= h_threshold) # 对于tail0,认为是是否存在旋转,即目标是否大于30度 if abs(angle) > self.rotate_angle_threshold and len(tail) > 0: tail[cond, 0] = True output = [list(coord / np_output_size) + list(tail_item) for coord, tail_item in zip(result[cond], tail[cond])] return output @property def targets(self): return { "image": self.apply, "bboxes": self.apply_to_bboxes } def to_tuple(self, value, add_value=1): if isinstance(value, tuple): return value elif add_value is not None: return (add_value-value, add_value+value) else: return (value, value) @property def targets_as_params(self): return ["image", "bboxes"] def get_transform_init_args_names(self): return "height", "width", "scale_limit", "angle_limit", "offset_limit", "rotate_angle_threshold"

transform = A.Compose([ RandomAffine(800, 800), A.HorizontalFlip(), A.OneOf([ A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=20, val_shift_limit=27), A.RandomContrast(limit=0.8), A.JpegCompression(quality_lower=5, quality_upper=100), ]), A.OneOf([ A.ISONoise(), A.IAAAdditiveGaussianNoise(), A.IAASharpen(), ]), A.OneOf([ A.Cutout(num_holes=32, max_h_size=24, max_w_size=24, p=0.5), A.RandomRain(p=0.2), A.RandomFog(fog_coef_lower=0.1, fog_coef_upper=0.3, p=0.2), A.IAAPerspective(p=0.5) ]), A.OneOf([ A.Blur(blur_limit=9), A.MotionBlur(p=1, blur_limit=7), A.GaussianBlur(blur_limit=21), A.GlassBlur(), A.ToGray(), A.RandomGamma(gamma_limit=(0, 120), p=0.5), ]), #ToTensorV2(), 直接归一化 #ToTensor() 可以进行normalize ], bbox_params=A.BboxParams("pascal_voc")) cv_image = cv2.imread("cv_image.jpg") # 如果bboxes的列数大于4时,label_field不用提供 w, h = cv_image.shape[:2] trans_out = transform(image=cv_image, bboxes=[[x, y, r, b, False] for x, y, r, b in bboxes]) trans_out_image = trans_out["image"] #print(trans_out["bboxes"]) for x, y, r, b, rot in trans_out["bboxes"]: x = int(x) y = int(y) r = int(r) b = int(b) color = (0, 0, 255) if rot else (0, 255, 0) cv2.rectangle(trans_out_image, (x, y), (r, b), color, 2) show(trans_out_image)