1 # download data from ncbi `https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=SRR8102131`
 2 mkdir -p ./{data/raw_data,analysis,ref}
 3 cd ref
 4 wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg19/ucsc.hg19*
 5 cd ..
 6 prefetch SRR8102131
 7 ln -s ~/ncbi/public/sra/* .
 8 
 9 # unzip SRR8102131 with fastq-dump
10 mkdir -p data/raw_data
11 fastq-dump --split-3 --defline-qual '+' --defline-seq '@$ac-$si/$ri length=$rl' --gzip SRR8102131 -O data/raw_data
12 
13 # quality control with fastqc
14 mkdir -p analysis/fastqc
15 fastqc data/raw_data/SRR8102131_{1,2}.fastq.gz -o analysis/fastqc
16 multiqc analysis/fastqc/ -o analysis/fastqc/
17 
18 # read mapping with bowtie
19 mkdir -p analysis/BAM
20 bowtie2-build --threads 4 ref/hg19.fasta ref/hg19
21 bowtie2 -x ref/hg19 -p 4 --reorder -X 2000 \
22 --rg "ID:sample_1" --rg "PL:illumina" --rg "SM:SRR8102131" \
23 -1 <(zcat data/raw_data/SRR8102131_1.fastq.gz) \
24 -2 <(zcat data/raw_data/SRR8102131_2.fastq.gz) | \
25 samtools view -F 4 -bS | \
26 samtools sort --threads 4 -m 2G -o analysis/BAM/SRR8102131_sorted.bam
27 sambamba index -t 4 analysis/BAM/SRR8102131_sorted.bam
28 
29 # peak calling with macs2
30 mkdir -p analysis/peak/
31 macs2 callpeak --shift -100 --extsize 200 --SPMR --nomodel -B -g hs -q 0.01 -t analysis/BAM/SRR8102131_sorted.bam -n analysis/peak/SRR8102131_peak
32 
33 # duplicates removing with sambamba
34 mkdir -p analysis/dupbam
35 ulimit -n 10240
36 sambamba markdup -r -p -t 4 analysis/BAM/SRR8102131_sorted.bam analysis/dupbam/SRR8102131_markdupba.bam
37 sambamba index -t 6 analysis/dupbam/SRR8102131_markdupba.bam
38 
39 # HINT-ATAC
40 mkdir -p analysis/footprint
41 rgt-hint footprinting --atac-seq --paired-end --organism=hg19 --output-location=analysis/footprint --output-prefix=SRR8102131 analysis/dupbam/SRR8102131_markdupba.bam analysis/peak/SRR8102131_peak_peaks.narrowPeak
42 
43 rgt-hint tracks --bc --bigWig --organism=hg19 --output-location=analysis/bigwig --output-prefix=SRR8102131_BC analysis/dupbam/SRR8102131_markdupba.bam analysis/peak/SRR8102131_peak_peaks.narrowPeak
44 
45 mkdir -p analysis/footprint/MPBS
46 rgt-motifanalysis matching --organism=hg19 --input-files=analysis/footprint/SRR8102131.bed --output-location=analysis/footprint/MPBS
47 
48 mkdir -p analysis/footprint/differential
49 python script/sort_mpbs.py -f analysis/footprint/MPBS/SRR8102131_mpbs.bed -l 3
50 rgt-hint differential --organism=hg19 --bc --nc 2 --mpbs-files=analysis/footprint/MPBS/SRR8102131_mpbs.bed --reads-files=analysis/dupbam/SRR8102131_markdupba.bam --conditions=SRR8102131 --output-location=analysis/footprint/differential
51 
52 # bigwig quantitative file
53 mkdir -p analysis/bigwig
54 bamCoverage -b analysis/dupbam/SRR8102131_markdupba.bam --ignoreDuplicates \
55     --skipNonCoveredRegions \
56     --normalizeUsing RPKM \
57     --binSize 1 -p max -o analysis/bigwig/SRR8102131.bw

代码没有进行去接头操作,请读者自行添加!

文章链接 https://pubmed.ncbi.nlm.nih.gov/30808370/

 
posted on 2021-01-05 16:31  蒟蒻、  阅读(554)  评论(0)    收藏  举报