部分常用代码整理

获取 top-K 的检索数据
def
i2t(images, sims, npts=None, return_ranks=False): """ Images->Text (Image Annotation) Images: (N, n_region, d) matrix of images Captions: (5N, max_n_word, d) matrix of captions CapLens: (5N) array of caption lengths sims: (N, 5N) matrix of similarity im-cap """ npts = images.shape[0] ranks = np.zeros(npts) top1 = np.zeros(npts) results = np.zeros((5000, 10), dtype='int') for index in range(npts): inds = np.argsort(sims[index])[::-1] # Score rank = 1e20 for i in range(5 * index, 5 * index + 5, 1): tmp = np.where(inds == i)[0][0] if tmp < rank: rank = tmp ranks[index] = rank top1[index] = inds[0]
# ranking前10的文本 results[index]
= inds[0:10] # ranks 表示正确的排序位置,0表示top-1 np.savetxt("/mnt/data10t/bakuphome20210617/zhangkun/vis_cosine/i2t_results.csv", results) np.savetxt("/mnt/data10t/bakuphome20210617/zhangkun/vis_cosine/i2t_right_or_wrong.csv", ranks) # Compute metrics r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks) r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks) r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks) medr = np.floor(np.median(ranks)) + 1 meanr = ranks.mean() + 1 if return_ranks: return (r1, r5, r10, medr, meanr), (ranks, top1) else: return (r1, r5, r10, medr, meanr) def t2i(images, sims, npts=None, return_ranks=False): """ Text->Images (Image Search) Images: (N, n_region, d) matrix of images Captions: (5N, max_n_word, d) matrix of captions CapLens: (5N) array of caption lengths sims: (N, 5N) matrix of similarity im-cap """ npts = images.shape[0] ranks = np.zeros(5 * npts) top1 = np.zeros(5 * npts) # --> (5N(caption), N(image)) sims = sims.T results = np.zeros((5000, 10), dtype='int') for index in range(npts): for i in range(5): inds = np.argsort(sims[5 * index + i])[::-1] ranks[5 * index + i] = np.where(inds == index)[0][0] top1[5 * index + i] = inds[0] results[5 * index + i] = inds[0:10] np.savetxt("/mnt/data10t/bakuphome20210617/zhangkun/vis_cosine/t2i_results.csv", results) np.savetxt("/mnt/data10t/bakuphome20210617/zhangkun/vis_cosine/t2i_right_or_wrong.csv", ranks) # Compute metrics r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks) r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks) r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks) medr = np.floor(np.median(ranks)) + 1 meanr = ranks.mean() + 1 if return_ranks: return (r1, r5, r10, medr, meanr), (ranks, top1) else: return (r1, r5, r10, medr, meanr)
获取 attention map, bbox


import
cv2 import matplotlib.pyplot as plt import numpy as np import pandas as pd from tqdm import tqdm from PIL import Image # 输入图片的路径信息 ima_path,bounding boxes信息,处理后的attention值(36个值,分配到每个region上的注意力值),图片的命名信息 def region_attention_visualization(img_path, boxes, box_attentions, i): # print("load image from: ", img_path) # load the image img = Image.open(img_path, mode='r') # draw bounding box with attention img = np.ascontiguousarray(img) for box, attention in zip(boxes, box_attentions): zeros1 = np.zeros((img.shape), dtype=np.uint8) # box = cv2.rectangle(zeros1, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 255), -1) box = cv2.rectangle(zeros1, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 255), -1) img = cv2.addWeighted(img, 1, box, attention, 0) name = "/mnt/data2/zk/vis/%d.jpg" % i cv2.imwrite(name, cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) ## 图像路径,bounding boxe信息 def region_visualization(img_path, bboxes): print("load image from: ", img_path) for i in range(36): # load the image img = Image.open(img_path, mode='r') # draw bounding box with attention img = np.ascontiguousarray(img) p1 = (int(bboxes[i, 0]), int(bboxes[i, 1])) p2 = (int(bboxes[i, 2]), int(bboxes[i, 3])) print(p1,p2) cv2.rectangle(img, p1, p2, (0, 0, 255), 2) font = cv2.FONT_HERSHEY_SIMPLEX # 定义字体 imgzi = cv2.putText(img, '{}'.format(i), p1, font, 0.5, (255, 0, 0), 1) name = "/mnt/data2/zk/vis/bbx/%d.jpg" % i cv2.imwrite(name, cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) if __name__ == "__main__": flickrid_list = [] f = open("/mnt/data10t/bakuphome20210617/lz/data/I-T/Flickr30K/f30k_precomp/test_ids.txt", "r") lines = f.readlines() for line in lines: flickrid_list.append(line.split()[0]) df = pd.read_csv("/mnt/data10t/bakuphome20210617/lz/data/I-T/filename2flickrid.csv") total_step = len(flickrid_list) ##############--------------------------------------------------------------------------------------------------- # # print all image-text pairs # for i, imgid in tqdm(enumerate(flickrid_list), total=total_step, ncols=80): # a = df.loc[df['imgid'] == int(imgid)] # b = a.iloc[0, 0] # img_dir = "/mnt/data10t/bakuphome20210617/lz/data/I-T/flickr30k-images/" + str(b) # bbox_info = np.load("/mnt/data10t/bakuphome20210617/lz/data/I-T/Flickr30K/f30k_precomp/test_ims_bbx.npy") # bboxes = bbox_info[i] # conf_path = "/mnt/data10t/bakuphome20210617/lz/Dim_mask_5/attn_conf/finally/" + str(i) + ".npy" # conf = np.load(conf_path) # conf = (conf - np.min(conf)) / (np.max(conf) - np.min(conf)) # min-max scale # idxs = conf.argsort()[::-1][0:15] # extract confidence top-6 regions # for j in range(36): # if j in idxs: # pass # else: # conf[j] = 0. # conf_sum = np.sum(conf) # conf = (conf / conf_sum) * 5 # region_attention_visualization(img_dir, bboxes, conf, i) ##############--------------------------------------------------------------------------------------------------- # ####print selsected image-text pairs # 第几个文本(或组成的 第几个image-text pair) i = 53 imgid = '219' a = df.loc[df['imgid'] == int(imgid)] b = a.iloc[0, 0] img_dir = "/mnt/data10t/bakuphome20210617/lz/data/I-T/flickr30k-images/" + str(b) bbox_info = np.load("/mnt/data10t/bakuphome20210617/lz/data/I-T/Flickr30K/f30k_precomp/test_ims_bbx.npy") bboxes = bbox_info[i] # conf_path = "/mnt/data10t/bakuphome20210617/lz/Dim_mask_5/attn_conf/finally/" + str(i) + ".npy" conf_path = "/mnt/data10t/bakuphome20210617/lz/neg_2021_9_11/attn_conf/" + str(i) + ".npy" conf = np.load(conf_path) conf = (conf - np.min(conf)) / (np.max(conf) - np.min(conf)) # min-max scale idxs = conf.argsort()[::-1][0:15] # extract confidence top-6 regions for j in range(36): if j in idxs: pass else: conf[j] = 0. conf_sum = np.sum(conf) conf = (conf / conf_sum) * 1.5 region_attention_visualization(img_dir, bboxes, conf, i) # ##############--------------------------------------------------------------------------------------------------- # # 第几个文本(或组成的 第几个image-text pair) # i = 53 # imgid = '219' # a = df.loc[df['imgid'] == int(imgid)] # b = a.iloc[0, 0] # img_dir = "/mnt/data10t/bakuphome20210617/lz/data/I-T/flickr30k-images/" + str(b) # bbox_info = np.load("/mnt/data10t/bakuphome20210617/lz/data/I-T/Flickr30K/f30k_precomp/test_ims_bbx.npy") # bboxes = bbox_info[i] # region_visualization(img_dir, bboxes)

数据集JSON文件处理:根据数据集划分的ID,重新组织image file name和对应的captions

import numpy as np
import pandas as pd
import ujson as json
from modules.basic_utils import load_json


caption_train_2014 = '/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/annotations/captions_train2014.json'
caption_val_2014 = '/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/annotations/captions_val2014.json'
caption_train = load_json(caption_train_2014)
caption_val = load_json(caption_val_2014)

testall_image_id = np.loadtxt('/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/coco_precomp/testall_ids.txt', dtype=int)
train_image_id = np.loadtxt('/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/coco_precomp/train_ids.txt', dtype=int)
dev_image_id = np.loadtxt('/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/coco_precomp/dev_ids.txt', dtype=int)

print(len(testall_image_id))
print(len(train_image_id))
print(len(dev_image_id))

######################################################################################################################
train_json = {'images':[]}

for i in range(len(train_image_id)):
    image_id = train_image_id[i]
    split = 'train'
    file_name = ''
    caption = []
    for j in range(len(caption_train['images'])):
        if image_id == caption_train['images'][j]['id']:
            file_name = caption_train['images'][j]['file_name']

            for k in range(len(caption_train['annotations'])):
                if image_id == caption_train['annotations'][k]['image_id']:
                    caption.append(caption_train['annotations'][k]['caption'])
            break


    
    if file_name == '':
        for j in range(len(caption_val['images'])):
            if image_id == caption_val['images'][j]['id']:
                file_name = caption_val['images'][j]['file_name']

                for k in range(len(caption_val['annotations'])):
                    if image_id == caption_val['annotations'][k]['image_id']:
                        caption.append(caption_val['annotations'][k]['caption'])
                break

    if len(caption) != 5:
        print('error!')
    data = {'imageid':str(image_id), 'split':split, 'file_name':file_name, 
    'sentences':[{'raw':caption[0]}, {'raw':caption[1]}, {'raw':caption[2]}, {'raw':caption[3]}, {'raw':caption[4]}]}

    train_json['images'].append(data)

train_json = json.dumps(train_json)
f = open('/mnt/data2/zk/train_coco.json', 'w')
f.write(train_json)
f.close()


######################################################################################################################
# dev_json = {'images':[]}

# for i in range(len(dev_image_id)//5):
#     image_id = dev_image_id[i*5]
#     split = 'dev'
#     file_name = ''
#     caption = []


#     for j in range(len(caption_val['images'])):
#         if image_id == caption_val['images'][j]['id']:
#             file_name = caption_val['images'][j]['file_name']

#             for k in range(len(caption_val['annotations'])):
#                 if image_id == caption_val['annotations'][k]['image_id']:
#                     caption.append(caption_val['annotations'][k]['caption'])
#             break

#     if len(caption) >= 5:
#         data = {'imageid':str(image_id), 'split':split, 'file_name':file_name, 
#         'sentences':[{'raw':caption[0]}, {'raw':caption[1]}, {'raw':caption[2]}, {'raw':caption[3]}, {'raw':caption[4]}]}
#         dev_json['images'].append(data)
#     else:
#         print("error!")

# train_json = json.dumps(dev_json)
# f = open('/mnt/data2/zk/dev_coco.json', 'w')
# f.write(train_json)
# f.close()

# for i in range(len(testall_json['images'])):
#     testall_json['images'][i]['split'] = 'testall'



######################################################################################################################
# testall_json = {'images':[]}

# for i in range(len(testall_image_id)//5):
#     image_id = testall_image_id[i*5]
#     split = 'testall'
#     file_name = ''
#     caption = []


#     for j in range(len(caption_val['images'])):
#         if image_id == caption_val['images'][j]['id']:
#             file_name = caption_val['images'][j]['file_name']

#             for k in range(len(caption_val['annotations'])):
#                 if image_id == caption_val['annotations'][k]['image_id']:
#                     caption.append(caption_val['annotations'][k]['caption'])
#             break

#     if len(caption) >= 5:
#         data = {'imageid':str(image_id), 'split':split, 'file_name':file_name, 
#         'sentences':[{'raw':caption[0]}, {'raw':caption[1]}, {'raw':caption[2]}, {'raw':caption[3]}, {'raw':caption[4]}]}
#         testall_json['images'].append(data)
#     else:
#         print("error!")

# testall_json = json.dumps(testall_json)
# f = open('/mnt/data2/zk/testall_coco.json', 'w')
# f.write(testall_json)
# f.close()

# # for i in range(len(testall_json['images'])):
# #     testall_json['images'][i]['split'] = 'testall'

Ensemble代码:输入为对应的similarity matrix

# -------------------------------------------------------------------------------------
# Negative-Aware Attention Framework for Image-Text Matching  implementation based on SCAN
# https:.
# "Negative-Aware Attention Framework for Image-Text Matching"
# Kun Zhang, Zhendong Mao, Quan Wang, Yongdong Zhang
#
# Writen by Kun Zhang, 2022
# -------------------------------------------------------------------------------------
# from vocab import Vocabulary
# import evaluation
import numpy as np
import os


def i2t(im_len, sims, npts=None, return_ranks=False):
    """
    Images->Text (Image Annotation)
    Images: (N, n_region, d) matrix of images
    Captions: (5N, max_n_word, d) matrix of captions
    CapLens: (5N) array of caption lengths
    sims: (N, 5N) matrix of similarity im-cap
    """
    npts = im_len
    ranks = np.zeros(npts)
    top1 = np.zeros(npts)
    for index in range(npts):
        inds = np.argsort(sims[index])[::-1]
        # Score
        rank = 1e20
        for i in range(5 * index, 5 * index + 5, 1):
            tmp = np.where(inds == i)[0][0]
            if tmp < rank:
                rank = tmp
        ranks[index] = rank
        top1[index] = inds[0]

    # Compute metrics
    r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)
    medr = np.floor(np.median(ranks)) + 1
    meanr = ranks.mean() + 1
    if return_ranks:
        return (r1, r5, r10, medr, meanr), (ranks, top1)
    else:
        return (r1, r5, r10, medr, meanr)


def t2i(im_len, sims, npts=None, return_ranks=False):
    """
    Text->Images (Image Search)
    Images: (N, n_region, d) matrix of images
    Captions: (5N, max_n_word, d) matrix of captions
    CapLens: (5N) array of caption lengths
    sims: (N, 5N) matrix of similarity im-cap
    """
    npts = im_len
    ranks = np.zeros(5 * npts)
    top1 = np.zeros(5 * npts)

    # --> (5N(caption), N(image))
    sims = sims.T

    for index in range(npts):
        for i in range(5):
            inds = np.argsort(sims[5 * index + i])[::-1]
            ranks[5 * index + i] = np.where(inds == index)[0][0]
            top1[5 * index + i] = inds[0]

    # Compute metrics
    r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)
    medr = np.floor(np.median(ranks)) + 1
    meanr = ranks.mean() + 1
    if return_ranks:
        return (r1, r5, r10, medr, meanr), (ranks, top1)
    else:
        return (r1, r5, r10, medr, meanr)



if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    isfold5 = True

    if not isfold5:

        # ## Flickr30K
        # Path_of_Model_1 = '/mnt/data2/zk/ESL_bert/checkpoint2/Flickr30K_ESL_MODELS/sim_best_flickr_521.7_learnable.txt'
        # Path_of_Model_2 = '/mnt/data2/zk/ESL_bert/checkpoint2/Flickr30K_ESL_MODELS/sim_best_flickr_522.2.txt'

        ## MS-COCO
        Path_of_Model_1 = '/mnt/data2/zk/ESL_bert/checkpoint2/COCO-LEARNABLE/sim_best_447.0_coco_5k.txt'
        Path_of_Model_2 = '/mnt/data2/zk/ESL_bert/checkpoint2/COCO-NON-LEARNABLE/sim_best_coco_446.9_non_learnable.txt'

        sims1 = np.loadtxt(Path_of_Model_1)
        sims2 = np.loadtxt(Path_of_Model_2)

        sims = (sims1 + sims2) 
        im_len = len(sims)
        print('im length:', im_len)
        r, rt = i2t(im_len, sims, return_ranks=True)
        ri, rti = t2i(im_len, sims, return_ranks=True)
        ar = (r[0] + r[1] + r[2]) / 3
        ari = (ri[0] + ri[1] + ri[2]) / 3
        rsum = r[0] + r[1] + r[2] + ri[0] + ri[1] + ri[2]
        print("rsum: %.1f" % rsum)
        print("Average i2t Recall: %.1f" % ar)
        print("Image to text: %.1f %.1f %.1f %.1f %.1f" % r)
        print("Average t2i Recall: %.1f" % ari)
        print("Text to image: %.1f %.1f %.1f %.1f %.1f" % ri)
    else:
        results = []
        for i in range(5):

            Path_of_Model_1 = '/mnt/data2/zk/ESL_bert/checkpoint2/COCO-LEARNABLE/'
            Path_of_Model_2 = '/mnt/data2/zk/ESL_bert/checkpoint2/COCO-NON-LEARNABLE/'

            sims1 = np.loadtxt(Path_of_Model_1 + str(i) + 'sim_best.txt')
            sims2 = np.loadtxt(Path_of_Model_2 + str(i) + 'sim_best.txt')

            sim_shard = (sims1 + sims2) / 2
            im_len = len(sim_shard)
            print('im length:', im_len)
            r, rt0 = i2t(im_len, sim_shard, return_ranks=True)
            print("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % r)
            ri, rti0 = t2i(im_len, sim_shard, return_ranks=True)
            print("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % ri)

            if i == 0:
                rt, rti = rt0, rti0
            ar = (r[0] + r[1] + r[2]) / 3
            ari = (ri[0] + ri[1] + ri[2]) / 3
            rsum = r[0] + r[1] + r[2] + ri[0] + ri[1] + ri[2]
            print("rsum: %.1f ar: %.1f ari: %.1f" % (rsum, ar, ari))
            results += [list(r) + list(ri) + [ar, ari, rsum]]

        print("-----------------------------------")
        print("Mean metrics: ")
        mean_metrics = tuple(np.array(results).mean(axis=0).flatten())
        print("rsum: %.1f" % ( mean_metrics[12]))
        print("Average i2t Recall: %.1f" % mean_metrics[11])
        print("Image to text: %.1f %.1f %.1f %.1f %.1f" %
              mean_metrics[:5])
        print("Average t2i Recall: %.1f" % mean_metrics[12])
        print("Text to image: %.1f %.1f %.1f %.1f %.1f" %
              mean_metrics[5:10])

 打印argparse 参数

parser = argparse.ArgumentParser()
# parser.add_argument(...)
# ... 继续添加arguments
args = parser.parse_args()

# 1.使用print打印
for arg in vars(args):
    print(format(arg, '<20'), format(str(getattr(args, arg)), '<'))   # str, arg_type

# 2.使用logging打印
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
message = '\n'.join([f'{k:<20}: {v}' for k, v in vars(args).items()])
logger.info(message)
# 或者直接logging:
logging.info(message)

 统计数据集中caption的长度

import torch
import torch.utils.data as data
import random
import os
import numpy as np
import pickle
import json
import nltk

captions_length = []
captions_length_bool = []
## fLICK  /mnt/data10t/bakuphome20210617/I-T/Flickr30K/f30k_precomp/train_precaps.txt
# MSCOCO /mnt/data10t/bakuphome20210617/data/coco_precomp/train_precaps_stan.txt
with open('/mnt/data10t/bakuphome20210617/data/coco_precomp/train_precaps_stan.txt', 'r') as f:
    for line in f:
        length = len(line.strip().split(','))-2
        captions_length.append(length)
        if length >= 12:
            captions_length_bool.append(1)
        else:
            captions_length_bool.append(0)

print(np.mean(captions_length))
vali_length = np.sum(captions_length_bool)   
print(vali_length/len(captions_length_bool))
## fLICK 12.40635172413793  #10-> 0.6734965517241379  15-》0.27022068965517243
## MSCOCO  10.303602355080459 #10-> 0.5642359670571204  15-》0.05558625438046731

 

视觉-语言对齐可视化:

(1)注意力权重

首先保存注意力权重矩阵

##################################################################################################
            query = cap_i_expand

            ### cross-attention 
            smooth = torch.exp(self.temp_scale.weight)
            # --> (batch, d, queryL)

            # (batch, sourceL, d)(batch, d, queryL)
            attn = torch.tanh(query @ Diagonal_Mask @ torch.transpose(img_emb, 1, 2))

            # --> (batch, sourceL, queryL)
            attnT = torch.transpose(attn, 1, 2).contiguous()
            attn = nn.LeakyReLU(0.1)(attnT)
            attn = l2norm(attn, 2)

            # --> (batch, queryL, sourceL)
            attn = torch.transpose(attn, 1, 2).contiguous()
            # --> (batch, queryL, sourceL
            attn = F.softmax(attn * smooth, dim=2)
            # --> (batch, queryL, d)



            # ##################################################################################################
            # truth = int(i/5)
            # save_path = '/home1/kunzhang/vis-DH-Set-bert/attention'
            # np.save(save_path + '/{}_{}.npy'.format(i, space), attn[truth].cpu().numpy())

            # ##################################################################################################

其次,处理为每一个box对应权重大小,并进行可视化:

import torch
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

flickrid_list = []
f = open("/home1/kunzhang/vis-LAPS/test_ids.txt", "r")
lines = f.readlines()
for line in lines:
    flickrid_list.append(line.split()[0])

df = pd.read_csv("/home1/kunzhang/vis-DH-Set-bert/filename2flickrid.csv")

total_step = len(flickrid_list)
k = 0
for i, imgid in tqdm(enumerate(flickrid_list),total=total_step, ncols=80):
  
  imgi = i // 5
  if i == imgi * 5:
    a = df.loc[df['imgid'] == int(imgid)]
    b = a.iloc[0,0]
    
    for cap_j in range(5):
          
      image_index = i + cap_j
    
      img_dir = "/home1/kunzhang/datasets/flickr30k-images/"+str(b)
      #print(img_dir)
      img = cv2.imread(img_dir)
      bbox_info = np.load("/home1/kunzhang/vis-DH-Set-bert/test_ims_bbx.npy")
      #print(bbox_info.shape)
      bboxes = bbox_info[image_index]
      
      n_reg = 9
      
      # pn_0 = "/home1/kunzhang/vis-DH-Set-bert/attention/"+str(image_index)+"_"+str(0)+".npy"
      # pn = np.load(pn_0).sum(0) /(len(pn_0))

      # pn_1 = "/home1/kunzhang/vis-DH-Set-bert/attention/"+str(image_index)+"_"+str(1)+".npy"
      # pn = np.load(pn_1).sum(0) /(len(pn_1))

      pn_2 = "/home1/kunzhang/vis-DH-Set-bert/attention/"+str(image_index)+"_"+str(2)+".npy"
      pn = np.load(pn_2).sum(0) /(len(pn_2))

      ######
      # pn = (pn_0 + pn_1 + pn_2).sum(0) /(3*len(pn_0))
      

      pn = (pn-np.min(pn))/(np.max(pn)-np.min(pn))
      
      idxs = pn.argsort()[::-1][0:n_reg] # extract confidence top-6 regions
      
      target = np.zeros((img.shape[0], img.shape[1])) # create a heatmap matrix
      
      for reg_idx in idxs:
          x_1 = bboxes[reg_idx,0]
          y_1 = bboxes[reg_idx,1]
          x_2 = bboxes[reg_idx,2]
          y_2 = bboxes[reg_idx,3]
          
          x_mean = int((x_1+x_2)/2.)
          y_mean = int((y_1+y_2)/2.)
          target[y_mean][x_mean] = pn[reg_idx]
      
      heatmap = cv2.GaussianBlur(target, (251,251), 0)
      
      am = np.max(heatmap)
      heatmap /= am / 200
      heatmap = np.uint8(heatmap)
      heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
      
      img_add = cv2.addWeighted(src1=img, alpha=1.0, src2=heatmap, beta=0.6, gamma=0)
          
      name = "/home1/kunzhang/vis-DH-Set-bert/images-subspaces/"+str(image_index)+"_"+str(cap_j)+"_"+str(2)+".jpg"
      cv2.imwrite(name, img_add)  
      pass
    
    

 

(2)特征嵌入的T-SNE (文件保存在D:\cvpr2025可视化\TSNE\code)

首先,要保存对应的特征嵌入。首先根据要统计的文本语义ID,获取对应的特征向量

        f = open("/home1/kunzhang/vis-LAPS/id40.txt","r") 
        lines = f.readlines()
        id_list = []
        for line in lines:
            line = line.strip('\n')
            id_list.append(int(line))




            ##################################################################################################

            cap_i_num = caps_all_ids[i][:n_word]

            for nn_id in id_list:
                exist_traget = False
                if nn_id in cap_i_num:
                    index = cap_i_num.tolist().index(nn_id)
                    # print(index)
                    exist_traget = True
    

                if exist_traget == True:
                    cross_modal_feature = sim_loc_1[int(i/5), index]
                    np.save('./feat/cross_40551/1/'+str(i)+'_'+str(nn_id)+'.npy', cross_modal_feature.data.cpu().numpy())

                    cross_modal_feature = sim_loc_2[int(i/5), index]
                    np.save('./feat/cross_40551/2/'+str(i)+'_'+str(nn_id)+'.npy', cross_modal_feature.data.cpu().numpy())

                    cross_modal_feature = sim_loc_3[int(i/5), index]
                    np.save('./feat/cross_40551/3/'+str(i)+'_'+str(nn_id)+'.npy', cross_modal_feature.data.cpu().numpy())
            ##################################################################################################

 

然后,将按照语义单词相应的文件名称顺序,进行文件的读取,并且排列为 语义特征个数序列的 特征矩阵,送入到TSNE中进行分析处理:

import pandas as pd
import torch
import random
import h5py
from sklearn.manifold import TSNE
from sklearn.datasets import load_iris,load_digits
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
from torch import nn


f = open("./id40.txt","r") 
lines = f.readlines()
wid_list = []
for line in lines:
  idx_i = line.strip()
  wid_list.append(idx_i)
  
arr_all = []
arr_len = []
for wwid,wid in enumerate(wid_list):
  npy_wid_path = "./id40/"+wid+".txt"
  f = open(npy_wid_path,"r")
  lines = f.readlines()
  
  ## 获取文件名称,同属于一个semantic
  npy_list = []
  for line in lines:
    npy_i = line.strip()
    npy_list.append(npy_i)

  arr_all = []
  arr_len = []

  ## 加载特征
  # /mnt/data10t/bakuphome20210617/zhangkun/vis_DimA/feat2/cross_40551
  for j in range(3):
    arr_list = []
    for npy_p in npy_list:
        # cross_i = np.load("./feat2/cross_40551/"+str(j+1)+"/"+npy_p)
        # cross_i = np.load("./feat2/cross_40537/"+str(j+1)+"/"+npy_p)
        # cross_i = np.load("./feat2/cross_40544/"+str(j+1)+"/"+npy_p)
        cross_i = np.load("./cross_whole/"+str(j+1)+"/"+npy_p)
        arr_list.append(cross_i)

    ## 记录特征个数
    arr_len_i = len(arr_list)
    arr_len.append(arr_len_i)
    
    arr_stack = np.vstack(arr_list)
    arr_all.append(arr_stack)


  arr_lenacc = [0]
  acc = 0
  for lenv in arr_len:
    acc = acc+lenv
    arr_lenacc.append(acc)
  print(arr_lenacc)

  concat_emb = np.concatenate(arr_all, 0)
  tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=1000)
  embeddings_tsne = tsne.fit_transform(concat_emb.data[:])  #T-SNE: (, 2)


    # color_list = ["rosybrown","yellowgreen","olive","indianred","blueviolet","brown","burlywood","cadetblue",\
    #             "chocolate","cornflowerblue","cyan","darkcyan","darkgreen","darkkhaki","darkmagenta","darkolivegreen","darkorange",\
    #             "darkorchid","darkred","darksalmon","darkseagreen","darkslateblue","darkslategray","deeppink","deepskyblue","dodgerblue","fuchsia",\
    #             "gold","goldenrod","green","mediumvioletred","indianred","indigo","teal","crimson",\
    #             "olivedrab","orange","orangered","orchid","steelblue","slateblue","salmon",\
    #             "limegreen","maroon","mediumaquamarine","mediumblue","mediumorchid","mediumspringgreen","navy","oldlace","lightcoral"]


  # color_list = ["darkgreen","darkorange","darkred"]
  color_list = ["thistle","mediumorchid","indigo"]

  plt.figure(figsize=(6, 6))

  for i in range(3):
    plt.scatter(embeddings_tsne[arr_lenacc[i]:arr_lenacc[i+1], 0], embeddings_tsne[arr_lenacc[i]:arr_lenacc[i+1], 1], s=20, c=color_list[i])

    ## Add labels to the data points
    #for label in seleted_class:
    #    i = class_name.index(label)
    #    plt.annotate(label, (embeddings_tsne[i,0], embeddings_tsne[i,1]))
    #    plt.scatter(embeddings_tsne[i, 0], embeddings_tsne[i, 1], s=21, c='red', marker='^')
    ## plt.legend()

  # plt.savefig('./figs_551/cross_one'+str(wwid)+'.png', dpi=300)
  # plt.savefig('./figs_537/cross_base'+str(wwid)+'.png', dpi=300)
  plt.savefig('./figs_whole/cross_whole'+str(wwid)+'.png', dpi=300)
  pass
import pandas as pd
import torch
import random
import h5py
from sklearn.manifold import TSNE
from sklearn.datasets import load_iris,load_digits
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
from torch import nn


f = open("./id40.txt","r") 
lines = f.readlines()
wid_list = []
for line in lines:
  idx_i = line.strip()
  wid_list.append(idx_i)
  
arr_all = []
arr_len = []
nums = 20
for wwid,wid in enumerate(wid_list):
  if wwid<nums:
    npy_wid_path = "./id40/"+wid+".txt"
    f = open(npy_wid_path,"r")
    lines = f.readlines()
  
    ## 获取文件名称,同属于一个semantic
    npy_list = []
    for line in lines:
      npy_i = line.strip()
      npy_list.append(npy_i)

    ## 加载特征
    arr_list = []
    for j in range(3):
      for npy_p in npy_list:
          cross_i = np.load("./feat2/cross_40551/"+str(j+1)+"/"+npy_p)
        # cross_i = np.load("./feat2/cross_40537/"+str(j+1)+"/"+npy_p)
        # cross_i = np.load("./feat2/cross_40544/"+str(j+1)+"/"+npy_p)
        # cross_i = np.load("./feat2/cross_whole/"+str(j+1)+"/"+npy_p)
          arr_list.append(cross_i)

    ## 记录特征个数
    arr_len_i = len(arr_list)
    arr_len.append(arr_len_i)
    
    arr_stack = np.vstack(arr_list)
    arr_all.append(arr_stack)


arr_lenacc = [0]
acc = 0
for lenv in arr_len:
  acc = acc+lenv
  arr_lenacc.append(acc)
print(arr_lenacc)

concat_emb = np.concatenate(arr_all, 0)
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=1000)
embeddings_tsne = tsne.fit_transform(concat_emb.data[:])  #T-SNE: (, 2)


color_list = ["rosybrown","yellowgreen","olive","indianred","blueviolet","brown","burlywood","cadetblue",\
                "chocolate","cornflowerblue","cyan","darkcyan","darkgreen","darkkhaki","darkmagenta","darkolivegreen","darkorange",\
                "darkorchid","darkred","darksalmon","darkseagreen","darkslateblue","darkslategray","deeppink","deepskyblue","dodgerblue","fuchsia",\
                "gold","goldenrod","green","mediumvioletred","indianred","indigo","teal","crimson",\
                "olivedrab","orange","orangered","orchid","steelblue","slateblue","salmon",\
                "limegreen","maroon","mediumaquamarine","mediumblue","mediumorchid","mediumspringgreen","navy","oldlace","lightcoral"]


  # color_list = ["darkgreen","darkorange","darkred"]
  # color_list = ["thistle","mediumorchid","indigo"]

plt.figure(figsize=(10, 10))

for i in range(nums):
  plt.scatter(embeddings_tsne[arr_lenacc[i]:arr_lenacc[i+1], 0], embeddings_tsne[arr_lenacc[i]:arr_lenacc[i+1], 1], s=10, c=color_list[i])

    ## Add labels to the data points
    #for label in seleted_class:
    #    i = class_name.index(label)
    #    plt.annotate(label, (embeddings_tsne[i,0], embeddings_tsne[i,1]))
    #    plt.scatter(embeddings_tsne[i, 0], embeddings_tsne[i, 1], s=21, c='red', marker='^')
    ## plt.legend()

  # plt.savefig('./figs_551/cross_one'+str(wwid)+'.png', dpi=300)
  # plt.savefig('./figs_537/cross_base'+str(wwid)+'.png', dpi=300)
plt.savefig('cross_all-'+str(nums)+'.png', dpi=300)
pass

 

posted @ 2023-03-27 11:45  kkzhang  阅读(43)  评论(0)    收藏  举报