nms
def cal_iou(bbox1,bbox2):
# x1,y1,x2,y2
# min_x - max_x
inter_x = min(bbox1[2],bbox2[2]) - max(bbox1[0],bbox2[0])
# min_y - max_y
inter_y = min(bbox1[3],bbox2[3]) - max(bbox1[1],bbox2[1])
if inter_x <=0 or inter_y <=0:
return 0
inter_area = inter_x * inter_y
area1 = (bbox1[2] - bbox1[0] +1)* (bbox1[3] - bbox1[1] +1)
area2 = (bbox2[2] - bbox2[0] +1)* (bbox2[3] - bbox2[1] +1)
#分母不会是0,面积最小是1
iou = inter_area / (area1+area2 - inter_area)
return iou
def nms(bboxes,scores,iou_thre=0.3):
infos = list(map(list,zip(scores,bboxes)))
print(infos)
infos.sort(key=lambda x:x[0],reverse=True)
#print(infos)
for i in range(len(infos)):
for j in range(i+1,len(infos)):
iou = cal_iou(infos[i][1],infos[j][1])
print(iou)
if iou > iou_thre:
#score置-1,后面清空
infos[j][0] = -1
#剔除score=-1的框
new_bboxes = []
for i in range(len(infos)):
if infos[i][0] != -1:
new_bboxes.append(infos[i][1])
return new_bboxes
test_bboxes = [[0,0,100,100],[10,10,100,100],[100,100,200,200]]
test_scores = [0.98,0.3,0.7]
res = nms(test_bboxes,test_scores)
print(res)
![]()
手撕MHA
class Attention(nn.Module):
def __init__(
self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
proj_drop=0., window_size=None, attn_head_dim=None):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
if attn_head_dim is not None:
head_dim = attn_head_dim
all_head_dim = head_dim * self.num_heads
self.scale = qk_scale or head_dim ** -0.5
self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False)
if qkv_bias:
self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
else:
self.q_bias = None
self.v_bias = None
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(all_head_dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x, bool_masked_pos=None):
B, N, C1 = x.shape
qkv_bias = None
if self.q_bias is not None:
qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
# B N C1 -> B N C*3
qkv = self.qkv(x)
# B N C*3 -> B N 3 num_heads C/num_heads -> 3 B num_heads N C/num_heads
qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
# split
q, k, v = qkv[0], qkv[1], qkv[2]
#B num_heads N C/num_heads
q = q * self.scale
# N C @ C N -> N N
attn = (q @ k.transpose(-2, -1)) # (B, num_heads, N, N)
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
#B, N_head, N, N @ B, N_head, N, C = B, N_head, N, C
# B, N_head, N, C -> B,N, N_head,C -> B,N, N_head*C
x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
# B N N_head*C -> B N C1
x = self.proj(x)
x = self.proj_drop(x)
return x
手撕cross-attention
'''
Modified from Attention()
'''
class CrossAttention(nn.Module):
def __init__(
self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
proj_drop=0., window_size=None, attn_head_dim=None):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
if attn_head_dim is not None:
head_dim = attn_head_dim
all_head_dim = head_dim * self.num_heads
self.scale = qk_scale or head_dim ** -0.5
self.q = nn.Linear(dim, all_head_dim, bias=False)
self.k = nn.Linear(dim, all_head_dim, bias=False)
self.v = nn.Linear(dim, all_head_dim, bias=False)
if qkv_bias:
self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
else:
self.q_bias = None
self.k_bias = None
self.v_bias = None
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(all_head_dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x, bool_masked_pos=None, k=None, v=None):
B, N, C = x.shape
N_k = k.shape[1]
N_v = v.shape[1]
q_bias, k_bias, v_bias = None, None, None
if self.q_bias is not None:
q_bias = self.q_bias
k_bias = torch.zeros_like(self.v_bias, requires_grad=False)
v_bias = self.v_bias
q = F.linear(input=x, weight=self.q.weight, bias=q_bias)
q = q.reshape(B, N, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0) # (B, N_head, N_q, dim)
k = F.linear(input=k, weight=self.k.weight, bias=k_bias)
k = k.reshape(B, N_k, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0)
v = F.linear(input=v, weight=self.v.weight, bias=v_bias)
v = v.reshape(B, N_v, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0)
q = q * self.scale
attn = (q @ k.transpose(-2, -1)) # (B, N_head, N_q, N_k)
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
x = self.proj(x)
x = self.proj_drop(x)
return x