部分常用代码整理
获取 top-K 的检索数据
def i2t(images, sims, npts=None, return_ranks=False): """ Images->Text (Image Annotation) Images: (N, n_region, d) matrix of images Captions: (5N, max_n_word, d) matrix of captions CapLens: (5N) array of caption lengths sims: (N, 5N) matrix of similarity im-cap """ npts = images.shape[0] ranks = np.zeros(npts) top1 = np.zeros(npts) results = np.zeros((5000, 10), dtype='int') for index in range(npts): inds = np.argsort(sims[index])[::-1] # Score rank = 1e20 for i in range(5 * index, 5 * index + 5, 1): tmp = np.where(inds == i)[0][0] if tmp < rank: rank = tmp ranks[index] = rank top1[index] = inds[0]
# ranking前10的文本 results[index] = inds[0:10] # ranks 表示正确的排序位置,0表示top-1 np.savetxt("/mnt/data10t/bakuphome20210617/zhangkun/vis_cosine/i2t_results.csv", results) np.savetxt("/mnt/data10t/bakuphome20210617/zhangkun/vis_cosine/i2t_right_or_wrong.csv", ranks) # Compute metrics r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks) r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks) r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks) medr = np.floor(np.median(ranks)) + 1 meanr = ranks.mean() + 1 if return_ranks: return (r1, r5, r10, medr, meanr), (ranks, top1) else: return (r1, r5, r10, medr, meanr) def t2i(images, sims, npts=None, return_ranks=False): """ Text->Images (Image Search) Images: (N, n_region, d) matrix of images Captions: (5N, max_n_word, d) matrix of captions CapLens: (5N) array of caption lengths sims: (N, 5N) matrix of similarity im-cap """ npts = images.shape[0] ranks = np.zeros(5 * npts) top1 = np.zeros(5 * npts) # --> (5N(caption), N(image)) sims = sims.T results = np.zeros((5000, 10), dtype='int') for index in range(npts): for i in range(5): inds = np.argsort(sims[5 * index + i])[::-1] ranks[5 * index + i] = np.where(inds == index)[0][0] top1[5 * index + i] = inds[0] results[5 * index + i] = inds[0:10] np.savetxt("/mnt/data10t/bakuphome20210617/zhangkun/vis_cosine/t2i_results.csv", results) np.savetxt("/mnt/data10t/bakuphome20210617/zhangkun/vis_cosine/t2i_right_or_wrong.csv", ranks) # Compute metrics r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks) r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks) r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks) medr = np.floor(np.median(ranks)) + 1 meanr = ranks.mean() + 1 if return_ranks: return (r1, r5, r10, medr, meanr), (ranks, top1) else: return (r1, r5, r10, medr, meanr)
获取 attention map, bbox
import cv2 import matplotlib.pyplot as plt import numpy as np import pandas as pd from tqdm import tqdm from PIL import Image # 输入图片的路径信息 ima_path,bounding boxes信息,处理后的attention值(36个值,分配到每个region上的注意力值),图片的命名信息 def region_attention_visualization(img_path, boxes, box_attentions, i): # print("load image from: ", img_path) # load the image img = Image.open(img_path, mode='r') # draw bounding box with attention img = np.ascontiguousarray(img) for box, attention in zip(boxes, box_attentions): zeros1 = np.zeros((img.shape), dtype=np.uint8) # box = cv2.rectangle(zeros1, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 255), -1) box = cv2.rectangle(zeros1, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 255), -1) img = cv2.addWeighted(img, 1, box, attention, 0) name = "/mnt/data2/zk/vis/%d.jpg" % i cv2.imwrite(name, cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) ## 图像路径,bounding boxe信息 def region_visualization(img_path, bboxes): print("load image from: ", img_path) for i in range(36): # load the image img = Image.open(img_path, mode='r') # draw bounding box with attention img = np.ascontiguousarray(img) p1 = (int(bboxes[i, 0]), int(bboxes[i, 1])) p2 = (int(bboxes[i, 2]), int(bboxes[i, 3])) print(p1,p2) cv2.rectangle(img, p1, p2, (0, 0, 255), 2) font = cv2.FONT_HERSHEY_SIMPLEX # 定义字体 imgzi = cv2.putText(img, '{}'.format(i), p1, font, 0.5, (255, 0, 0), 1) name = "/mnt/data2/zk/vis/bbx/%d.jpg" % i cv2.imwrite(name, cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) if __name__ == "__main__": flickrid_list = [] f = open("/mnt/data10t/bakuphome20210617/lz/data/I-T/Flickr30K/f30k_precomp/test_ids.txt", "r") lines = f.readlines() for line in lines: flickrid_list.append(line.split()[0]) df = pd.read_csv("/mnt/data10t/bakuphome20210617/lz/data/I-T/filename2flickrid.csv") total_step = len(flickrid_list) ##############--------------------------------------------------------------------------------------------------- # # print all image-text pairs # for i, imgid in tqdm(enumerate(flickrid_list), total=total_step, ncols=80): # a = df.loc[df['imgid'] == int(imgid)] # b = a.iloc[0, 0] # img_dir = "/mnt/data10t/bakuphome20210617/lz/data/I-T/flickr30k-images/" + str(b) # bbox_info = np.load("/mnt/data10t/bakuphome20210617/lz/data/I-T/Flickr30K/f30k_precomp/test_ims_bbx.npy") # bboxes = bbox_info[i] # conf_path = "/mnt/data10t/bakuphome20210617/lz/Dim_mask_5/attn_conf/finally/" + str(i) + ".npy" # conf = np.load(conf_path) # conf = (conf - np.min(conf)) / (np.max(conf) - np.min(conf)) # min-max scale # idxs = conf.argsort()[::-1][0:15] # extract confidence top-6 regions # for j in range(36): # if j in idxs: # pass # else: # conf[j] = 0. # conf_sum = np.sum(conf) # conf = (conf / conf_sum) * 5 # region_attention_visualization(img_dir, bboxes, conf, i) ##############--------------------------------------------------------------------------------------------------- # ####print selsected image-text pairs # 第几个文本(或组成的 第几个image-text pair) i = 53 imgid = '219' a = df.loc[df['imgid'] == int(imgid)] b = a.iloc[0, 0] img_dir = "/mnt/data10t/bakuphome20210617/lz/data/I-T/flickr30k-images/" + str(b) bbox_info = np.load("/mnt/data10t/bakuphome20210617/lz/data/I-T/Flickr30K/f30k_precomp/test_ims_bbx.npy") bboxes = bbox_info[i] # conf_path = "/mnt/data10t/bakuphome20210617/lz/Dim_mask_5/attn_conf/finally/" + str(i) + ".npy" conf_path = "/mnt/data10t/bakuphome20210617/lz/neg_2021_9_11/attn_conf/" + str(i) + ".npy" conf = np.load(conf_path) conf = (conf - np.min(conf)) / (np.max(conf) - np.min(conf)) # min-max scale idxs = conf.argsort()[::-1][0:15] # extract confidence top-6 regions for j in range(36): if j in idxs: pass else: conf[j] = 0. conf_sum = np.sum(conf) conf = (conf / conf_sum) * 1.5 region_attention_visualization(img_dir, bboxes, conf, i) # ##############--------------------------------------------------------------------------------------------------- # # 第几个文本(或组成的 第几个image-text pair) # i = 53 # imgid = '219' # a = df.loc[df['imgid'] == int(imgid)] # b = a.iloc[0, 0] # img_dir = "/mnt/data10t/bakuphome20210617/lz/data/I-T/flickr30k-images/" + str(b) # bbox_info = np.load("/mnt/data10t/bakuphome20210617/lz/data/I-T/Flickr30K/f30k_precomp/test_ims_bbx.npy") # bboxes = bbox_info[i] # region_visualization(img_dir, bboxes)
数据集JSON文件处理:根据数据集划分的ID,重新组织image file name和对应的captions
import numpy as np import pandas as pd import ujson as json from modules.basic_utils import load_json caption_train_2014 = '/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/annotations/captions_train2014.json' caption_val_2014 = '/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/annotations/captions_val2014.json' caption_train = load_json(caption_train_2014) caption_val = load_json(caption_val_2014) testall_image_id = np.loadtxt('/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/coco_precomp/testall_ids.txt', dtype=int) train_image_id = np.loadtxt('/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/coco_precomp/train_ids.txt', dtype=int) dev_image_id = np.loadtxt('/mnt/data10t/bakuphome20210617/lz/data/I-T/MS-COCO/coco_precomp/dev_ids.txt', dtype=int) print(len(testall_image_id)) print(len(train_image_id)) print(len(dev_image_id)) ###################################################################################################################### train_json = {'images':[]} for i in range(len(train_image_id)): image_id = train_image_id[i] split = 'train' file_name = '' caption = [] for j in range(len(caption_train['images'])): if image_id == caption_train['images'][j]['id']: file_name = caption_train['images'][j]['file_name'] for k in range(len(caption_train['annotations'])): if image_id == caption_train['annotations'][k]['image_id']: caption.append(caption_train['annotations'][k]['caption']) break if file_name == '': for j in range(len(caption_val['images'])): if image_id == caption_val['images'][j]['id']: file_name = caption_val['images'][j]['file_name'] for k in range(len(caption_val['annotations'])): if image_id == caption_val['annotations'][k]['image_id']: caption.append(caption_val['annotations'][k]['caption']) break if len(caption) != 5: print('error!') data = {'imageid':str(image_id), 'split':split, 'file_name':file_name, 'sentences':[{'raw':caption[0]}, {'raw':caption[1]}, {'raw':caption[2]}, {'raw':caption[3]}, {'raw':caption[4]}]} train_json['images'].append(data) train_json = json.dumps(train_json) f = open('/mnt/data2/zk/train_coco.json', 'w') f.write(train_json) f.close() ###################################################################################################################### # dev_json = {'images':[]} # for i in range(len(dev_image_id)//5): # image_id = dev_image_id[i*5] # split = 'dev' # file_name = '' # caption = [] # for j in range(len(caption_val['images'])): # if image_id == caption_val['images'][j]['id']: # file_name = caption_val['images'][j]['file_name'] # for k in range(len(caption_val['annotations'])): # if image_id == caption_val['annotations'][k]['image_id']: # caption.append(caption_val['annotations'][k]['caption']) # break # if len(caption) >= 5: # data = {'imageid':str(image_id), 'split':split, 'file_name':file_name, # 'sentences':[{'raw':caption[0]}, {'raw':caption[1]}, {'raw':caption[2]}, {'raw':caption[3]}, {'raw':caption[4]}]} # dev_json['images'].append(data) # else: # print("error!") # train_json = json.dumps(dev_json) # f = open('/mnt/data2/zk/dev_coco.json', 'w') # f.write(train_json) # f.close() # for i in range(len(testall_json['images'])): # testall_json['images'][i]['split'] = 'testall' ###################################################################################################################### # testall_json = {'images':[]} # for i in range(len(testall_image_id)//5): # image_id = testall_image_id[i*5] # split = 'testall' # file_name = '' # caption = [] # for j in range(len(caption_val['images'])): # if image_id == caption_val['images'][j]['id']: # file_name = caption_val['images'][j]['file_name'] # for k in range(len(caption_val['annotations'])): # if image_id == caption_val['annotations'][k]['image_id']: # caption.append(caption_val['annotations'][k]['caption']) # break # if len(caption) >= 5: # data = {'imageid':str(image_id), 'split':split, 'file_name':file_name, # 'sentences':[{'raw':caption[0]}, {'raw':caption[1]}, {'raw':caption[2]}, {'raw':caption[3]}, {'raw':caption[4]}]} # testall_json['images'].append(data) # else: # print("error!") # testall_json = json.dumps(testall_json) # f = open('/mnt/data2/zk/testall_coco.json', 'w') # f.write(testall_json) # f.close() # # for i in range(len(testall_json['images'])): # # testall_json['images'][i]['split'] = 'testall'
Ensemble代码:输入为对应的similarity matrix
# ------------------------------------------------------------------------------------- # Negative-Aware Attention Framework for Image-Text Matching implementation based on SCAN # https:. # "Negative-Aware Attention Framework for Image-Text Matching" # Kun Zhang, Zhendong Mao, Quan Wang, Yongdong Zhang # # Writen by Kun Zhang, 2022 # ------------------------------------------------------------------------------------- # from vocab import Vocabulary # import evaluation import numpy as np import os def i2t(im_len, sims, npts=None, return_ranks=False): """ Images->Text (Image Annotation) Images: (N, n_region, d) matrix of images Captions: (5N, max_n_word, d) matrix of captions CapLens: (5N) array of caption lengths sims: (N, 5N) matrix of similarity im-cap """ npts = im_len ranks = np.zeros(npts) top1 = np.zeros(npts) for index in range(npts): inds = np.argsort(sims[index])[::-1] # Score rank = 1e20 for i in range(5 * index, 5 * index + 5, 1): tmp = np.where(inds == i)[0][0] if tmp < rank: rank = tmp ranks[index] = rank top1[index] = inds[0] # Compute metrics r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks) r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks) r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks) medr = np.floor(np.median(ranks)) + 1 meanr = ranks.mean() + 1 if return_ranks: return (r1, r5, r10, medr, meanr), (ranks, top1) else: return (r1, r5, r10, medr, meanr) def t2i(im_len, sims, npts=None, return_ranks=False): """ Text->Images (Image Search) Images: (N, n_region, d) matrix of images Captions: (5N, max_n_word, d) matrix of captions CapLens: (5N) array of caption lengths sims: (N, 5N) matrix of similarity im-cap """ npts = im_len ranks = np.zeros(5 * npts) top1 = np.zeros(5 * npts) # --> (5N(caption), N(image)) sims = sims.T for index in range(npts): for i in range(5): inds = np.argsort(sims[5 * index + i])[::-1] ranks[5 * index + i] = np.where(inds == index)[0][0] top1[5 * index + i] = inds[0] # Compute metrics r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks) r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks) r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks) medr = np.floor(np.median(ranks)) + 1 meanr = ranks.mean() + 1 if return_ranks: return (r1, r5, r10, medr, meanr), (ranks, top1) else: return (r1, r5, r10, medr, meanr) if __name__ == '__main__': os.environ["CUDA_VISIBLE_DEVICES"] = "0" isfold5 = True if not isfold5: # ## Flickr30K # Path_of_Model_1 = '/mnt/data2/zk/ESL_bert/checkpoint2/Flickr30K_ESL_MODELS/sim_best_flickr_521.7_learnable.txt' # Path_of_Model_2 = '/mnt/data2/zk/ESL_bert/checkpoint2/Flickr30K_ESL_MODELS/sim_best_flickr_522.2.txt' ## MS-COCO Path_of_Model_1 = '/mnt/data2/zk/ESL_bert/checkpoint2/COCO-LEARNABLE/sim_best_447.0_coco_5k.txt' Path_of_Model_2 = '/mnt/data2/zk/ESL_bert/checkpoint2/COCO-NON-LEARNABLE/sim_best_coco_446.9_non_learnable.txt' sims1 = np.loadtxt(Path_of_Model_1) sims2 = np.loadtxt(Path_of_Model_2) sims = (sims1 + sims2) im_len = len(sims) print('im length:', im_len) r, rt = i2t(im_len, sims, return_ranks=True) ri, rti = t2i(im_len, sims, return_ranks=True) ar = (r[0] + r[1] + r[2]) / 3 ari = (ri[0] + ri[1] + ri[2]) / 3 rsum = r[0] + r[1] + r[2] + ri[0] + ri[1] + ri[2] print("rsum: %.1f" % rsum) print("Average i2t Recall: %.1f" % ar) print("Image to text: %.1f %.1f %.1f %.1f %.1f" % r) print("Average t2i Recall: %.1f" % ari) print("Text to image: %.1f %.1f %.1f %.1f %.1f" % ri) else: results = [] for i in range(5): Path_of_Model_1 = '/mnt/data2/zk/ESL_bert/checkpoint2/COCO-LEARNABLE/' Path_of_Model_2 = '/mnt/data2/zk/ESL_bert/checkpoint2/COCO-NON-LEARNABLE/' sims1 = np.loadtxt(Path_of_Model_1 + str(i) + 'sim_best.txt') sims2 = np.loadtxt(Path_of_Model_2 + str(i) + 'sim_best.txt') sim_shard = (sims1 + sims2) / 2 im_len = len(sim_shard) print('im length:', im_len) r, rt0 = i2t(im_len, sim_shard, return_ranks=True) print("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % r) ri, rti0 = t2i(im_len, sim_shard, return_ranks=True) print("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % ri) if i == 0: rt, rti = rt0, rti0 ar = (r[0] + r[1] + r[2]) / 3 ari = (ri[0] + ri[1] + ri[2]) / 3 rsum = r[0] + r[1] + r[2] + ri[0] + ri[1] + ri[2] print("rsum: %.1f ar: %.1f ari: %.1f" % (rsum, ar, ari)) results += [list(r) + list(ri) + [ar, ari, rsum]] print("-----------------------------------") print("Mean metrics: ") mean_metrics = tuple(np.array(results).mean(axis=0).flatten()) print("rsum: %.1f" % ( mean_metrics[12])) print("Average i2t Recall: %.1f" % mean_metrics[11]) print("Image to text: %.1f %.1f %.1f %.1f %.1f" % mean_metrics[:5]) print("Average t2i Recall: %.1f" % mean_metrics[12]) print("Text to image: %.1f %.1f %.1f %.1f %.1f" % mean_metrics[5:10])
打印argparse 参数
parser = argparse.ArgumentParser() # parser.add_argument(...) # ... 继续添加arguments args = parser.parse_args() # 1.使用print打印 for arg in vars(args): print(format(arg, '<20'), format(str(getattr(args, arg)), '<')) # str, arg_type # 2.使用logging打印 import logging logger = logging.getLogger() logger.setLevel(logging.INFO) message = '\n'.join([f'{k:<20}: {v}' for k, v in vars(args).items()]) logger.info(message) # 或者直接logging: logging.info(message)
统计数据集中caption的长度
import torch import torch.utils.data as data import random import os import numpy as np import pickle import json import nltk captions_length = [] captions_length_bool = [] ## fLICK /mnt/data10t/bakuphome20210617/I-T/Flickr30K/f30k_precomp/train_precaps.txt # MSCOCO /mnt/data10t/bakuphome20210617/data/coco_precomp/train_precaps_stan.txt with open('/mnt/data10t/bakuphome20210617/data/coco_precomp/train_precaps_stan.txt', 'r') as f: for line in f: length = len(line.strip().split(','))-2 captions_length.append(length) if length >= 12: captions_length_bool.append(1) else: captions_length_bool.append(0) print(np.mean(captions_length)) vali_length = np.sum(captions_length_bool) print(vali_length/len(captions_length_bool)) ## fLICK 12.40635172413793 #10-> 0.6734965517241379 15-》0.27022068965517243 ## MSCOCO 10.303602355080459 #10-> 0.5642359670571204 15-》0.05558625438046731
视觉-语言对齐可视化:
(1)注意力权重
首先保存注意力权重矩阵
################################################################################################## query = cap_i_expand ### cross-attention smooth = torch.exp(self.temp_scale.weight) # --> (batch, d, queryL) # (batch, sourceL, d)(batch, d, queryL) attn = torch.tanh(query @ Diagonal_Mask @ torch.transpose(img_emb, 1, 2)) # --> (batch, sourceL, queryL) attnT = torch.transpose(attn, 1, 2).contiguous() attn = nn.LeakyReLU(0.1)(attnT) attn = l2norm(attn, 2) # --> (batch, queryL, sourceL) attn = torch.transpose(attn, 1, 2).contiguous() # --> (batch, queryL, sourceL attn = F.softmax(attn * smooth, dim=2) # --> (batch, queryL, d) # ################################################################################################## # truth = int(i/5) # save_path = '/home1/kunzhang/vis-DH-Set-bert/attention' # np.save(save_path + '/{}_{}.npy'.format(i, space), attn[truth].cpu().numpy()) # ##################################################################################################
其次,处理为每一个box对应权重大小,并进行可视化:
import torch import cv2 import numpy as np import pandas as pd from tqdm import tqdm flickrid_list = [] f = open("/home1/kunzhang/vis-LAPS/test_ids.txt", "r") lines = f.readlines() for line in lines: flickrid_list.append(line.split()[0]) df = pd.read_csv("/home1/kunzhang/vis-DH-Set-bert/filename2flickrid.csv") total_step = len(flickrid_list) k = 0 for i, imgid in tqdm(enumerate(flickrid_list),total=total_step, ncols=80): imgi = i // 5 if i == imgi * 5: a = df.loc[df['imgid'] == int(imgid)] b = a.iloc[0,0] for cap_j in range(5): image_index = i + cap_j img_dir = "/home1/kunzhang/datasets/flickr30k-images/"+str(b) #print(img_dir) img = cv2.imread(img_dir) bbox_info = np.load("/home1/kunzhang/vis-DH-Set-bert/test_ims_bbx.npy") #print(bbox_info.shape) bboxes = bbox_info[image_index] n_reg = 9 # pn_0 = "/home1/kunzhang/vis-DH-Set-bert/attention/"+str(image_index)+"_"+str(0)+".npy" # pn = np.load(pn_0).sum(0) /(len(pn_0)) # pn_1 = "/home1/kunzhang/vis-DH-Set-bert/attention/"+str(image_index)+"_"+str(1)+".npy" # pn = np.load(pn_1).sum(0) /(len(pn_1)) pn_2 = "/home1/kunzhang/vis-DH-Set-bert/attention/"+str(image_index)+"_"+str(2)+".npy" pn = np.load(pn_2).sum(0) /(len(pn_2)) ###### # pn = (pn_0 + pn_1 + pn_2).sum(0) /(3*len(pn_0)) pn = (pn-np.min(pn))/(np.max(pn)-np.min(pn)) idxs = pn.argsort()[::-1][0:n_reg] # extract confidence top-6 regions target = np.zeros((img.shape[0], img.shape[1])) # create a heatmap matrix for reg_idx in idxs: x_1 = bboxes[reg_idx,0] y_1 = bboxes[reg_idx,1] x_2 = bboxes[reg_idx,2] y_2 = bboxes[reg_idx,3] x_mean = int((x_1+x_2)/2.) y_mean = int((y_1+y_2)/2.) target[y_mean][x_mean] = pn[reg_idx] heatmap = cv2.GaussianBlur(target, (251,251), 0) am = np.max(heatmap) heatmap /= am / 200 heatmap = np.uint8(heatmap) heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) img_add = cv2.addWeighted(src1=img, alpha=1.0, src2=heatmap, beta=0.6, gamma=0) name = "/home1/kunzhang/vis-DH-Set-bert/images-subspaces/"+str(image_index)+"_"+str(cap_j)+"_"+str(2)+".jpg" cv2.imwrite(name, img_add) pass
(2)特征嵌入的T-SNE (文件保存在D:\cvpr2025可视化\TSNE\code)
首先,要保存对应的特征嵌入。首先根据要统计的文本语义ID,获取对应的特征向量
f = open("/home1/kunzhang/vis-LAPS/id40.txt","r") lines = f.readlines() id_list = [] for line in lines: line = line.strip('\n') id_list.append(int(line)) ################################################################################################## cap_i_num = caps_all_ids[i][:n_word] for nn_id in id_list: exist_traget = False if nn_id in cap_i_num: index = cap_i_num.tolist().index(nn_id) # print(index) exist_traget = True if exist_traget == True: cross_modal_feature = sim_loc_1[int(i/5), index] np.save('./feat/cross_40551/1/'+str(i)+'_'+str(nn_id)+'.npy', cross_modal_feature.data.cpu().numpy()) cross_modal_feature = sim_loc_2[int(i/5), index] np.save('./feat/cross_40551/2/'+str(i)+'_'+str(nn_id)+'.npy', cross_modal_feature.data.cpu().numpy()) cross_modal_feature = sim_loc_3[int(i/5), index] np.save('./feat/cross_40551/3/'+str(i)+'_'+str(nn_id)+'.npy', cross_modal_feature.data.cpu().numpy()) ##################################################################################################
然后,将按照语义单词相应的文件名称顺序,进行文件的读取,并且排列为 语义特征个数序列的 特征矩阵,送入到TSNE中进行分析处理:
import pandas as pd import torch import random import h5py from sklearn.manifold import TSNE from sklearn.datasets import load_iris,load_digits from sklearn.decomposition import PCA import matplotlib.pyplot as plt import numpy as np from torch import nn f = open("./id40.txt","r") lines = f.readlines() wid_list = [] for line in lines: idx_i = line.strip() wid_list.append(idx_i) arr_all = [] arr_len = [] for wwid,wid in enumerate(wid_list): npy_wid_path = "./id40/"+wid+".txt" f = open(npy_wid_path,"r") lines = f.readlines() ## 获取文件名称,同属于一个semantic npy_list = [] for line in lines: npy_i = line.strip() npy_list.append(npy_i) arr_all = [] arr_len = [] ## 加载特征 # /mnt/data10t/bakuphome20210617/zhangkun/vis_DimA/feat2/cross_40551 for j in range(3): arr_list = [] for npy_p in npy_list: # cross_i = np.load("./feat2/cross_40551/"+str(j+1)+"/"+npy_p) # cross_i = np.load("./feat2/cross_40537/"+str(j+1)+"/"+npy_p) # cross_i = np.load("./feat2/cross_40544/"+str(j+1)+"/"+npy_p) cross_i = np.load("./cross_whole/"+str(j+1)+"/"+npy_p) arr_list.append(cross_i) ## 记录特征个数 arr_len_i = len(arr_list) arr_len.append(arr_len_i) arr_stack = np.vstack(arr_list) arr_all.append(arr_stack) arr_lenacc = [0] acc = 0 for lenv in arr_len: acc = acc+lenv arr_lenacc.append(acc) print(arr_lenacc) concat_emb = np.concatenate(arr_all, 0) tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=1000) embeddings_tsne = tsne.fit_transform(concat_emb.data[:]) #T-SNE: (, 2) # color_list = ["rosybrown","yellowgreen","olive","indianred","blueviolet","brown","burlywood","cadetblue",\ # "chocolate","cornflowerblue","cyan","darkcyan","darkgreen","darkkhaki","darkmagenta","darkolivegreen","darkorange",\ # "darkorchid","darkred","darksalmon","darkseagreen","darkslateblue","darkslategray","deeppink","deepskyblue","dodgerblue","fuchsia",\ # "gold","goldenrod","green","mediumvioletred","indianred","indigo","teal","crimson",\ # "olivedrab","orange","orangered","orchid","steelblue","slateblue","salmon",\ # "limegreen","maroon","mediumaquamarine","mediumblue","mediumorchid","mediumspringgreen","navy","oldlace","lightcoral"] # color_list = ["darkgreen","darkorange","darkred"] color_list = ["thistle","mediumorchid","indigo"] plt.figure(figsize=(6, 6)) for i in range(3): plt.scatter(embeddings_tsne[arr_lenacc[i]:arr_lenacc[i+1], 0], embeddings_tsne[arr_lenacc[i]:arr_lenacc[i+1], 1], s=20, c=color_list[i]) ## Add labels to the data points #for label in seleted_class: # i = class_name.index(label) # plt.annotate(label, (embeddings_tsne[i,0], embeddings_tsne[i,1])) # plt.scatter(embeddings_tsne[i, 0], embeddings_tsne[i, 1], s=21, c='red', marker='^') ## plt.legend() # plt.savefig('./figs_551/cross_one'+str(wwid)+'.png', dpi=300) # plt.savefig('./figs_537/cross_base'+str(wwid)+'.png', dpi=300) plt.savefig('./figs_whole/cross_whole'+str(wwid)+'.png', dpi=300) pass
import pandas as pd import torch import random import h5py from sklearn.manifold import TSNE from sklearn.datasets import load_iris,load_digits from sklearn.decomposition import PCA import matplotlib.pyplot as plt import numpy as np from torch import nn f = open("./id40.txt","r") lines = f.readlines() wid_list = [] for line in lines: idx_i = line.strip() wid_list.append(idx_i) arr_all = [] arr_len = [] nums = 20 for wwid,wid in enumerate(wid_list): if wwid<nums: npy_wid_path = "./id40/"+wid+".txt" f = open(npy_wid_path,"r") lines = f.readlines() ## 获取文件名称,同属于一个semantic npy_list = [] for line in lines: npy_i = line.strip() npy_list.append(npy_i) ## 加载特征 arr_list = [] for j in range(3): for npy_p in npy_list: cross_i = np.load("./feat2/cross_40551/"+str(j+1)+"/"+npy_p) # cross_i = np.load("./feat2/cross_40537/"+str(j+1)+"/"+npy_p) # cross_i = np.load("./feat2/cross_40544/"+str(j+1)+"/"+npy_p) # cross_i = np.load("./feat2/cross_whole/"+str(j+1)+"/"+npy_p) arr_list.append(cross_i) ## 记录特征个数 arr_len_i = len(arr_list) arr_len.append(arr_len_i) arr_stack = np.vstack(arr_list) arr_all.append(arr_stack) arr_lenacc = [0] acc = 0 for lenv in arr_len: acc = acc+lenv arr_lenacc.append(acc) print(arr_lenacc) concat_emb = np.concatenate(arr_all, 0) tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=1000) embeddings_tsne = tsne.fit_transform(concat_emb.data[:]) #T-SNE: (, 2) color_list = ["rosybrown","yellowgreen","olive","indianred","blueviolet","brown","burlywood","cadetblue",\ "chocolate","cornflowerblue","cyan","darkcyan","darkgreen","darkkhaki","darkmagenta","darkolivegreen","darkorange",\ "darkorchid","darkred","darksalmon","darkseagreen","darkslateblue","darkslategray","deeppink","deepskyblue","dodgerblue","fuchsia",\ "gold","goldenrod","green","mediumvioletred","indianred","indigo","teal","crimson",\ "olivedrab","orange","orangered","orchid","steelblue","slateblue","salmon",\ "limegreen","maroon","mediumaquamarine","mediumblue","mediumorchid","mediumspringgreen","navy","oldlace","lightcoral"] # color_list = ["darkgreen","darkorange","darkred"] # color_list = ["thistle","mediumorchid","indigo"] plt.figure(figsize=(10, 10)) for i in range(nums): plt.scatter(embeddings_tsne[arr_lenacc[i]:arr_lenacc[i+1], 0], embeddings_tsne[arr_lenacc[i]:arr_lenacc[i+1], 1], s=10, c=color_list[i]) ## Add labels to the data points #for label in seleted_class: # i = class_name.index(label) # plt.annotate(label, (embeddings_tsne[i,0], embeddings_tsne[i,1])) # plt.scatter(embeddings_tsne[i, 0], embeddings_tsne[i, 1], s=21, c='red', marker='^') ## plt.legend() # plt.savefig('./figs_551/cross_one'+str(wwid)+'.png', dpi=300) # plt.savefig('./figs_537/cross_base'+str(wwid)+'.png', dpi=300) plt.savefig('cross_all-'+str(nums)+'.png', dpi=300) pass
浙公网安备 33010602011771号