vcf2maf

1. install VEP

(1) prerequisite

su

apt-get update

apt-get upgrade

apt-get install -y perl

#perl packages install

 

cpanm DBI

cpanm Archive::Zip

cpanm DBD::mysql  (no DBD:mysql in conda)

 

#c complier not found

apt-get install build-essential

conda update --all -c conda/label/cf201901

conda update --all -c conda-forge/label/cf201901

#xlocale.h not found on Ubuntu while installing

ln -s /usr/include/locale.h /usr/include/xlocale.h

#conda install zlib,htslib,samtools,liftover(export conda PATH in .bashrc)

conda install -c bioconda/label/cf201901 ucsc-liftover

 (2) download VEP

apt-get install -y build-essential git libncurses-dev

mkdir .vep

export VEP_PATH=$HOME/vep

export VEP_DATA=$HOME/.vep

export VER=96

 

#download VEP version96

curl -L -O https://github.com/Ensembl/ensembl-vep/archive/release/96.zip

unzip 96.zip; rm 96.zip; mv ensembl-vep-release-96 $VEP_PATH

export PERL5LIB=$VEP_PATH:$PERL5LIB    //htslib and tabix must be in the same folder so that cache_convert can work

cd $VEP_PATH

#download cache file

 #don't use rsync too slow and always error

cd $VEP_DATA

curl -O ftp://ftp.ensembl.org/pub/release-96/variation/vep/homo_sapiens_vep_96_GRCh37.tar.gz

tar -izxf homo_sapiens_vep_96_GRCh37.tar.gz -C $VEP_DATA

#download API 

perl INSTALL.pl --AUTO a --DESTDIR $VEP_PATH --CACHEDIR $VEP_DATA --NO_HTSLIB

#download refeference FASTA

perl INSTALL.pl --AUTO f --SPECIES homo_sapiens --ASSEMBLY GRCh37 --DESTDIR $VEP_PATH --CACHEDIR $VEP_DATA

#convert cache

perl convert_cache.pl --species homo_sapiens --version $VER\_GRCh37 --dir $VEP_DATA

 

#Download the ExAC r0.3.1 VCF

cd $VEP_DATA

curl -L ftp://ftp.broadinstitute.org:/pub/ExAC_release/release0.3.1/subsets/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz > $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz

echo"##FILTER=<ID=AC_Adj0_Filter,Description=\"Only low quality genotype calls containing alternate alleles are present\">"> header_line.tmp
curl -LO https://raw.githubusercontent.com/mskcc/vcf2maf/v1.6.16/data/known_somatic_sites.bed
bcftools annotate --header-lines header_line.tmp --remove FMT,^INF/AF,INF/AC,INF/AN,INF/AC_Adj,INF/AN_Adj,INF/AC_AFR,INF/AC_AMR,INF/AC_EAS,INF/AC_FIN,INF/AC_NFE,INF/AC_OTH,INF/AC_SAS,INF/AN_AFR,INF/AN_AMR,INF/AN_EAS,INF/AN_FIN,INF/AN_NFE,INF/AN_OTH,INF/AN_SAS
$VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz | bcftools filter --targets-file ^known_somatic_sites.bed --output-type z --output $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz

mv -f $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz
tabix -p vcf
$VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz

./vep --species homo_sapiens --assembly GRCh37 --offline --no_progress --no_stats --sift b --ccds --uniprot --hgvs --symbol --numbers --domains --gene_phenotype --canonical --protein --biotype --uniprot --tsl --pubmed --variant_class --shift_hgvs 1 --check_existing --total_length --allele_number --no_escape --xref_refseq --failed 1 --vcf --minimal --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --dir $VEP_DATA --fasta $VEP_DATA/homo_sapiens/$VER\_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz --input_file examples/homo_sapiens_GRCh37.vcf --output_file examples/homo_sapiens_GRCh37.vep.vcf --polyphen b --af --af_1kg --af_esp --regulatory
error:

2. install vcf2maf

export VCF2MAF_URL=`curl -sL https://api.github.com/repos/mskcc/vcf2maf/releases | grep -m1 tarball_url | cut -d\" -f4`

curl -L -o mskcc-vcf2maf.tar.gz $VCF2MAF_URL; tar -zxf mskcc-vcf2maf.tar.gz; cd mskcc-vcf2maf-*

perl vcf2maf.pl --man

 

---恢复内容结束---

echo"##FILTER=<ID=AC_Adj0_Filter,Description=\"Only low quality genotype calls containing alternate alleles are present\">"> header_line.tmp
curl -LO https://raw.githubusercontent.com/mskcc/vcf2maf/v1.6.16/data/known_somatic_sites.bed
bcftools annotate --header-lines header_line.tmp --remove FMT,^INF/AF,INF/AC,INF/AN,INF/AC_Adj,INF/AN_Adj,INF/AC_AFR,INF/AC_AMR,INF/AC_EAS,INF/AC_FIN,INF/AC_NFE,INF/AC_OTH,INF/AC_SAS,INF/AN_AFR,INF/AN_AMR,INF/AN_EAS,INF/AN_FIN,INF/AN_NFE,INF/AN_OTH,INF/AN_SAS
$VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz | bcftools filter --targets-file ^known_somatic_sites.bed --output-type z --output $VEP_DATA/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz

posted @ 2019-05-16 19:52  xiaoxiaoxiaoxue  阅读(762)  评论(0编辑  收藏  举报