【代码速记】四重复和两重复计算各位点的错误率

use strict;

open DATA , "<name1-50-a.txt" or die ("cannot open file : $!");

open OUT , ">check_result_50.txt";
open LOG , ">log.txt";
my @all;
my @all_array;
@all = <DATA>;
foreach (@all){
    push @all_array, [split(/    /, $_)]
}


#删除重复元素, 提取种群数和位点数
sub uniq{
    my %count;
    my @uniq = grep { ++$count{ $_ } < 2; } @_;
    return @uniq;
}
my @array;
foreach (@all_array){
    push @array, @$_[0];
}
my @uniq_type;
@uniq_type = &uniq(@array);
@uniq_type = grep(/[^sample]/,@uniq_type);

my %hash_data;
my %hash_data_binary;
my @loci;
for (my $i = 1; $i < scalar(@all_array); $i++){
    for (my $j = 1; $j < scalar(@{$all_array[$i]});$j++){
        $hash_data{$all_array[$i][0]}{$all_array[0][$j]} = $all_array[$i][$j];
        if($all_array[$i][$j]){
            $hash_data_binary{$all_array[$i][0]}{$all_array[0][$j]} = 1;
        }else{
            $hash_data_binary{$all_array[$i][0]}{$all_array[0][$j]} = 0;
        }
        push @loci, $all_array[0][$j];
    }
}

my @list_type;
foreach my $tmp (keys %hash_data_binary){
    $tmp =~ /_[01]_[FP]/;
    push @list_type, $`;
}
@list_type = &uniq(@list_type);

my %rate;
my %peaks;
foreach my $locus (@loci){
    my $score1 = 0;
    my $record1 = 0;
    foreach my $type (@list_type){
        my $score2 = 0;
        my $record2 = 0;
        my $state;
        my $score3;
        foreach my $sample (keys %hash_data_binary){
            if ($sample =~ /^$type/){
                $score2 += $hash_data_binary{$sample}{$locus};
                $record2 ++;
            }
        }
        $state = $score2/$record2;
        if($state == 0 or $state == 0.25){
            print LOG $type."-".$locus."\n";
        }else{
            if ($state == 1){
                $score3 = 1;
            }elsif($state == 0.5){
                $score3 = 0;
            }elsif ($state == 0.75){
                $score3 = 1;
            }else{
                $score3 = 0;
                print LOG print $state."\n";
            }
            $score1 += $score3;
            $record1 ++;
        }
    }
    if ($record1 != 0){
        $rate{$locus} = $score1/$record1;
        $peaks{$locus} = $record1;
    }else{
        $rate{$locus} = "";
        $peaks{$locus} = "0";
    }
}

# while((my $key, my $value) = each (%rate)){
    # print OUT $key."    ".$value."\n"
# }

for my $tmp (keys %rate){
    print OUT $tmp."    ".$rate{$tmp}."    ".$peaks{$tmp}."\n";
}

##test for the usage of {} and scalar, important!!!
#print scalar(@{$all_array[1]})."\n";
#print scalar(@{[1,2,3]});

close (DATA); close(OUT);

 

posted @ 2017-12-15 22:07  LeleLiu  阅读(242)  评论(0编辑  收藏  举报