【WDL】6. 实践:fastp质控测序数据
功能
输入(Pair End)测序序列文件,利用fastp进行QC和质量过滤(包括质量QC统计,Adapter去除,序列trimming,过滤等),生成Clean Reads文件,以及html的报告。
input.json
{
    "fastp.pair_end.adapter_sequence": "",
    "fastp.pair_end.adapter_sequence_r2": "",
    "fastp.pair_end.cpu": 2,
    "fastp.pair_end.disks": "local-disk 50 cloud_ssd",
    "fastp.pair_end.fix_mgi_id": false,
    "fastp.pair_end.html": "fastp.html",
    "fastp.pair_end.in1": "/path/to/reads_1.fastq",
    "fastp.pair_end.in2": "/path/to/reads_2.fastq",
    "fastp.pair_end.json": "fastp.json",
    "fastp.pair_end.memory": "4G",
    "fastp.pair_end.phred64": false,
    "fastp.pair_end.reads_to_process": "",
    "fastp.pair_end.report_title": "'fastp report'"
}
WDL
version 1.0
workflow fastp {
    call pair_end
    output {
        File clean_out1 = pair_end.out1
        File clean_out2 = pair_end.out2
        File html_report = pair_end.html_report
        File json_report = pair_end.json_report
    }
}
task pair_end {
    input {
        
        # I/O options
        File in1
        File in2
        Boolean? phred64 = false 
        Boolean? fix_mgi_id = false
        String? adapter_sequence
        String? adapter_sequence_r2
        Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads.
        # reporting options
        String json = "fastp.json"
        String html = "fastp.html"
        String report_title = "\'fastp report\'"
        # excute env
        Int cpu = 2
        String memory = "4G"
        String disks = "local-disk 50 cloud_ssd"
    }
    String out1_name = "clean-" + basename(in1)
    String out2_name = "clean-" + basename(in2)
    command <<<
        # basic command
        /opt/conda/bin/fastp \
        --in1 ~{in1} \
        --in2 ~{in2} \
        --out1 ~{out1_name} \
        --out2 ~{out2_name} \
        --json ~{json} \
        --html ~{html} \
        --report_title ~{report_title} \
        
        # options 可选参数使用值得借鉴
        ~{ true="--phred64 " false="" phred64 } \
        ~{ "--reads_to_process " + reads_to_process } \
        ~{ true="--fix_mgi_id " false="" fix_mgi_id } \
        ~{ "--adapter_sequence " + adapter_sequence } \
        ~{ "--adapter_sequence_r2 " + adapter_sequence_r2 }
    >>>
    runtime {
        cpu: cpu
        memory: memory
        disks: disks
        docker: "fastp:v0.20.1_cv1"
    }
    output {
        File out1 = out1_name
        File out2 = out2_name
        File json_report = json
        File html_report = html
    }
}
Referenced from aliyun
本文来自博客园,作者:生物信息与育种,转载请注明原文链接:https://www.cnblogs.com/miyuanbiotech/p/16270683.html。若要及时了解动态信息,请关注同名微信公众号:生物信息与育种。

 
                
            
         浙公网安备 33010602011771号
浙公网安备 33010602011771号