深度学习高频手撕代码

nms

def cal_iou(bbox1,bbox2):
    # x1,y1,x2,y2
    # min_x - max_x
    inter_x = min(bbox1[2],bbox2[2]) - max(bbox1[0],bbox2[0])
    # min_y - max_y
    inter_y = min(bbox1[3],bbox2[3]) - max(bbox1[1],bbox2[1])
    if inter_x <=0 or inter_y <=0:
        return 0
    inter_area = inter_x * inter_y
    area1 = (bbox1[2] - bbox1[0] +1)* (bbox1[3] - bbox1[1] +1)
    area2 = (bbox2[2] - bbox2[0] +1)* (bbox2[3] - bbox2[1] +1)
    #分母不会是0,面积最小是1
    iou = inter_area / (area1+area2 - inter_area)
    return iou


def nms(bboxes,scores,iou_thre=0.3):
    infos = list(map(list,zip(scores,bboxes)))
    print(infos)
    infos.sort(key=lambda x:x[0],reverse=True)
    #print(infos)
    for i in range(len(infos)):
        for j in range(i+1,len(infos)):
            iou = cal_iou(infos[i][1],infos[j][1])      
            print(iou)          
            if iou > iou_thre:
                #score置-1,后面清空
                infos[j][0] = -1
    #剔除score=-1的框
    new_bboxes = []
    for i in range(len(infos)):
        if infos[i][0] != -1:
            new_bboxes.append(infos[i][1])
    return new_bboxes

test_bboxes = [[0,0,100,100],[10,10,100,100],[100,100,200,200]]
test_scores = [0.98,0.3,0.7]
res = nms(test_bboxes,test_scores)
print(res)

transformer

手撕MHA

class Attention(nn.Module):
    def __init__(
            self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
            proj_drop=0., window_size=None, attn_head_dim=None):
        super().__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads
        if attn_head_dim is not None:
            head_dim = attn_head_dim
        all_head_dim = head_dim * self.num_heads
        self.scale = qk_scale or head_dim ** -0.5

        self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False)
        if qkv_bias:
            self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
            self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
        else:
            self.q_bias = None
            self.v_bias = None
        
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(all_head_dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

    def forward(self, x, bool_masked_pos=None):

        B, N, C1 = x.shape
        qkv_bias = None
        if self.q_bias is not None:
            qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
        # B N C1 -> B N C*3
        qkv = self.qkv(x)
        # B N C*3 ->  B N 3 num_heads C/num_heads -> 3 B num_heads N C/num_heads
        qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
        # split
        q, k, v = qkv[0], qkv[1], qkv[2]
        #B num_heads N C/num_heads
        q = q * self.scale
		# N C @ C N -> N N 
        attn = (q @ k.transpose(-2, -1))    # (B, num_heads, N, N)
        
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)
		#B, N_head, N, N @ B, N_head, N, C = B, N_head, N, C
		# B, N_head, N, C -> B,N, N_head,C -> B,N, N_head*C
        x = (attn @ v).transpose(1, 2).reshape(B, N, -1) 
        # B N N_head*C -> B N C1
        x = self.proj(x)
        x = self.proj_drop(x)

        return x

手撕cross-attention

'''
Modified from Attention()
'''
class CrossAttention(nn.Module):
    def __init__(
            self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
            proj_drop=0., window_size=None, attn_head_dim=None):
        super().__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads
        if attn_head_dim is not None:
            head_dim = attn_head_dim
        all_head_dim = head_dim * self.num_heads
        self.scale = qk_scale or head_dim ** -0.5

        self.q = nn.Linear(dim, all_head_dim, bias=False)
        self.k = nn.Linear(dim, all_head_dim, bias=False)
        self.v = nn.Linear(dim, all_head_dim, bias=False)

        if qkv_bias:
            self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
            self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
        else:
            self.q_bias = None
            self.k_bias = None
            self.v_bias = None

        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(all_head_dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

    def forward(self, x, bool_masked_pos=None, k=None, v=None):
        B, N, C = x.shape
        N_k = k.shape[1]
        N_v = v.shape[1]

        q_bias, k_bias, v_bias = None, None, None
        if self.q_bias is not None:
            q_bias = self.q_bias
            k_bias = torch.zeros_like(self.v_bias, requires_grad=False)
            v_bias = self.v_bias

        q = F.linear(input=x, weight=self.q.weight, bias=q_bias)
        q = q.reshape(B, N, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0)    # (B, N_head, N_q, dim)

        k = F.linear(input=k, weight=self.k.weight, bias=k_bias)
        k = k.reshape(B, N_k, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0)

        v = F.linear(input=v, weight=self.v.weight, bias=v_bias)   
        v = v.reshape(B, N_v, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0)

        q = q * self.scale
        attn = (q @ k.transpose(-2, -1))      # (B, N_head, N_q, N_k)
        
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)

        x = (attn @ v).transpose(1, 2).reshape(B, N, -1) 
        x = self.proj(x)
        x = self.proj_drop(x)

        return x

posted @ 2024-06-05 17:52 forrestr 阅读(243) 评论(0) 收藏举报

刷新页面返回顶部

深度学习高频手撕代码

nms

transformer

手撕MHA

手撕cross-attention

公告