筛选特定ID的条目信息

使用perl

use strict;
use warnings;

open NR , "<nr_bestout.xls" or die ("cannot ope file $!");
open RES, ">nr_res.txt";
my @rows;
my @list = ("TR10479|c2_g1","TR12583|c0_g1","TR15586|c0_g1","TR18003|c0_g1","TR19319|c0_g1","TR25053|c0_g1","TR25636|c0_g1","TR6050|c2_g3","TR6472|c0_g2","TR9989|c2_g1","TR10892|c0_g1","TR11415|c0_g1","TR11655|c0_g1","TR14439|c0_g1","TR14516|c1_g1","TR15693|c0_g1","TR17096|c0_g1","TR17184|c0_g5","TR17244|c0_g1","TR17475|c0_g1","TR20118|c0_g2","TR20179|c0_g1","TR22269|c0_g1","TR26674|c0_g1","TR4575|c0_g3","TR4743|c0_g5","TR5307|c1_g2","TR5430|c1_g2","TR7186|c5_g3","TR7292|c0_g1","TR7356|c0_g1","TR7991|c1_g2","TR8051|c2_g4","TR8198|c2_g1","TR8371|c1_g3","TR8569|c0_g1","TR9018|c1_g4","TR9310|c2_g2");
foreach (<NR>){
	push @rows, [split(/	/),$_];
}
foreach my $i (@list){
	my $tag = 0;
	foreach my $j (@rows){
		if ($i eq @$j[0]){
			print RES $i."\t".@$j[1]."\t".@$j[10]."\n";
			$tag = 1;
			last;
		}
	}
	if ($tag == 0){
		print RES $i."\tUnknown\t-\n";
	}
}
close(NR);
close(RES);

  输入文件格式

query_id	subject_id	identity	alignment_length	mismatches	gaps	query_start	query_end	subject_start	subject_end	e_value	bit_score
TR7308|c0_g1	gi|661899672|emb|CDO97666.1| unnamed protein product [Coffea canephora]	97.01	67	2	0	813	1013	67	133	4.2e-35	146.0
TR19212|c0_g1	gi|747102893|ref|XP_011099626.1| PREDICTED: zinc-finger homeodomain protein 6 [Sesamum indicum]	76.92	143	33	0	680	1108	211	353	5.9e-60	228.0
TR21369|c2_g1	gi|604321542|gb|EYU32118.1| hypothetical protein MIMGU_mgv1a019324mg, partial [Erythranthe guttata]	83.61	122	20	0	1	366	288	409	1.1e-58	224.0
TR17922|c0_g1	gi|604302721|gb|EYU22278.1| hypothetical protein MIMGU_mgv1a025105mg, partial [Erythranthe guttata]	80.81	370	71	0	3	1112	243	612	2.2e-180	628.0

  

posted @ 2018-09-11 12:46  LeleLiu  阅读(204)  评论(0编辑  收藏  举报