摘要: # Muti-head Attention 机制的实现 from math import sqrt import torch import torch.nn class Self_Attention(nn.Module): # input : batch_size * seq_len * input 阅读全文
posted @ 2024-09-23 14:31 15375357604 阅读(80) 评论(0) 推荐(0)