点击查看代码
import argparse
import os
from pickle import FALSE
import threading
import shutil
import datetime
import json
import time
import csv
import subprocess
def execCmd(cmd):
try:
print("命令{}开始运行{}".format(cmd, datetime.datetime.now()))
pid=int(str(subprocess.check_output(cmd, shell=True),'utf-8').replace("\n",""))
print(pid)
print("命令{}结束运行{}" .format(cmd, datetime.datetime.now()))
return pid
except:
print("{}\t运行失败".format(cmd))
run_cmds=open("all.commands",'w')
os.system('mkdir log')
RootDir=os.path.abspath('./')
def slurm(SlurmName,jobname,cmd,coren):
fp = open(SlurmName,"w")
fp.writelines("#!/bin/bash\n")
fp.writelines("#SBATCH -p {}\n".format(batch))
fp.writelines("#SBATCH -N 1\n")
fp.writelines("#SBATCH -n 1\n")
fp.writelines("#SBATCH -c {}\n".format(coren))
fp.writelines("#SBATCH --job-name={}\n".format(jobname))
fp.writelines("#SBATCH --output=log/{}.out\n".format(jobname))
fp.writelines("#SBATCH --error=log/{}.err\n".format(jobname))
fp.writelines("#SBATCH --parsable\n")
fp.writelines(cmd)
fp.close()
def slurmdep(ffile,jobname,cmd,pid,coren):
fp = open(ffile,"w")
fp.writelines("#!/bin/bash\n")
fp.writelines("#SBATCH -p {}\n".format(batch))
fp.writelines("#SBATCH -N 1\n")
fp.writelines("#SBATCH -n 1\n")
fp.writelines("#SBATCH -c {}\n".format(coren))
fp.writelines("#SBATCH --job-name={}\n".format(jobname))
fp.writelines("#SBATCH --output=log/{}.out\n".format(jobname))
fp.writelines("#SBATCH --error=log/{}.err\n".format(jobname))
fp.writelines("#SBATCH --parsable\n")
fp.writelines("#SBATCH --dependency=afterok:{}:+5\n".format(pid))
fp.writelines(cmd)
fp.close()
def slurmdepmem(ffile,jobname,cmd,pid,coren,mem):
fp = open(ffile,"w")
fp.writelines("#!/bin/bash\n")
fp.writelines("#SBATCH -p {}\n".format(batch))
fp.writelines("#SBATCH -N 1\n")
fp.writelines("#SBATCH -n 1\n")
fp.writelines("#SBATCH -c {}\n".format(coren))
fp.writelines("#SBATCH --mem={}\n".format(mem))
fp.writelines("#SBATCH --job-name={}\n".format(jobname))
fp.writelines("#SBATCH --output=log/{}.out\n".format(jobname))
fp.writelines("#SBATCH --error=log/{}.err\n".format(jobname))
fp.writelines("#SBATCH --parsable\n")
fp.writelines("#SBATCH --dependency=afterok:{}:+5\n".format(pid))
fp.writelines(cmd)
fp.close()
sample_type={}
def PrepareFastq(DataManage,Contract,SampleType):
downdir=DataManage + '*/' + Contract+ '/'+'*'
cmd = r'/PERSONALBIO/work/singlecell/s00/software/miniconda3/envs/stdpipe/bin/perl /PERSONALBIO/work/singlecell/s01/workdir/Inhouse_function/multi/ClassifyRawdataToSampleID.pl -m {} -r "{}" -o {} >rawfastq.sh'.format(SampleType,str(downdir),RootDir)
os.system(cmd)
for line in open(SampleType):
if not line.startswith("sample"):
line=line.strip().split()
sample_type[line[0]]=line[1]
for j in sample_type.keys():
os.system(f'mkdir -p 1.raw_data/{j}')
nn=r''' awk -F "\t" '{print "cat " $3 ">>1.raw_data/"$1"/"$1"_S1_L001_R1_001.fastq.gz";print "cat " $4 ">>1.raw_data/"$1"/"$1"_S1_L001_R2_001.fastq.gz" }' '''
cmd2=f'{nn} raw_fastq.txt > merge.sh && sh merge.sh '
SlurmName= 'PrepareFastq.slurm'
slurm(SlurmName,'PrepareFastq',cmd2,2)
cmd=f'sbatch {SlurmName}'
pid = execCmd(cmd)
run_cmds.write("{}\n".format(cmd2))
return sample_type,pid
def step1(species):
genome_config="/PERSONALBIO/work/singlecell/s01/workdir/Inhouse_function/multi/multi.json"
db_dic=json.load(open(genome_config))
geneexpression=db_dic[species]['gene-expression']
vdj=db_dic[species]['vdj']
feature=db_dic[species]['feature']
return geneexpression,vdj
sample_info_dic={}
e = [];v=[];dd = []
vv = []
def PrepareCsv(species,sample_info,immune,pid):
genome_config="/PERSONALBIO/work/singlecell/s01/workdir/Inhouse_function/multi/multi.json"
db_dic=json.load(open(genome_config))
geneexpression=db_dic[species]['gene-expression']
vdj=db_dic[species]['vdj']
feature=db_dic[species]['feature']
with open(r'./config.csv', 'w', newline='') as file:
writer = csv.writer(file)
list = ['[gene-expression]'],\
['reference', step1("hsa")[0]],\
['include-introns','false'], \
[], \
['[vdj]'],\
['reference', step1("hsa")[1]],\
[],\
['[libraries]'],\
['fastq_id','fastqs','lanes','feature_types','subsample_rate']
writer.writerows(list)
for line in open(sample_info):
if not line.startswith("sample"):
line=line.strip().split()
sample_info_dic[line[0]]=line[1]
for i in sample_info_dic.keys():
e.append(i + ','+RootDir + '/1.raw_data/'+i+','+'1|2'+','+ 'Gene Expression,')
if immune=='TCR':
v.append(i + '-T,'+RootDir + '/1.raw_data/'+i+'-T,'+'1|2'+','+ 'VDJ-T,')
elif immune=="BCR":
v.append(i + '-B,'+RootDir + '/1.raw_data/'+i+'-B,'+'1|2'+','+ 'VDJ-B,')
elif immune=='ALL':
v.append(i + '-T,'+RootDir + '/1.raw_data/'+i+'-T,'+'1|2'+','+ 'VDJ-T,')
v.append(i + '-B,'+RootDir + '/1.raw_data/'+i+'-B,'+'1|2'+','+ 'VDJ-B,')
dd = e +v
print(dd)
for i in dd:
cc = [i,]
print(cc)
vv.append(cc)
with open('./new.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(vv)
cmdrun='/PERSONALBIO/work/singlecell/s01/.conda/envs/py4/bin/Rscript /PERSONALBIO/work/singlecell/s01/workdir/Inhouse_function/multi/put.R'
name='preparescv.slurm'
slurmdep(name,'preparecsv',cmdrun,pid,6)
cmd=f'sbatch {name}'
pid = execCmd(cmd)
run_cmds.write("{}\n".format(cmdrun))
return sample_info_dic,pid
class MyThread(threading.Thread):
def __init__(self, func, args=()):
super(MyThread, self).__init__()
self.func = func
self.args = args
def run(self):
self.result = self.func(*self.args) # 在执行函数的同时,把结果赋值给result,
# 然后通过get_result函数获取返回的结果
def get_result(self):
try:
return self.result
except Exception as e:
return None
def QC(pid):
cmds=[]
for i in sample_type.keys():
qc=RootDir + '/'+'QC'+'/'+i
os.system(f'mkdir -p {qc}')
cmd4=f'/PERSONALBIO/work/singlecell/s00/software/FastQC/fastqc -j /PERSONALBIO/work/singlecell/s00/software/miniconda3/envs/stdpipe/bin/java -t 6 -o {qc} 1.raw_data/{i}/*gz '
run_cmds.write("{}\n".format(cmd4))
name='qc.'+i+'.slurm'
slurmdep(name,'QC',cmd4,pid,6)
cmd=f'sbatch {name}'
cmds.append(cmd)
threads = []
for cmd in cmds:
th = threading.Thread(target=execCmd, args=(cmd,))
th.start()
time.sleep(5)
threads.append(th)
for th in threads:
th.join()
def RunMulti(pid,mem):
cmds=[]
for sample in sample_info_dic.keys():
csv= sample + '.csv'
cmdrun="/PERSONALBIO/work/singlecell/s00/software/script/1.source/cellranger-7.1.0/bin/cellranger multi --id={} --csv={} --localcores 8".format(sample,csv)
MultiSlurm="multi."+sample+".slurm"
slurmdepmem(MultiSlurm,"multi",cmdrun,pid,6,mem)
cmd=f'sbatch {MultiSlurm}'
cmds.append(cmd)
run_cmds.write("{}\n".format(cmdrun))
threads = []
pids = []
for cmd in cmds:
th = MyThread(execCmd, args=(cmd,))
th.start()
time.sleep(5)
threads.append(th)
for th in threads:
th.join() # 一定执行join,等待子进程执行结束,主进程再往下执行
pids.append(th.get_result())
multid = pids.pop()
return multid
def Run_Format(pid,Contract,immune):
cmdrun='cp /PERSONALBIO/work/singlecell/s01/workdir/Inhouse_function/multi2/format.1.sh ./ && bash format.1.sh'+Contract+" "+immune
name='format.1.slurm'
slurmdep(name,'format.1',cmdrun,pid,6)
cmd=f'sbatch {name}'
pid=execCmd(cmd)
run_cmds.write("{}\n".format(cmdrun))
return pid
def main():
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--DataManage',required=True,type=str,
default='/PERSONALBIO/work/datamanage/Datum/singlecell/2023/03/',
help='download data dir ')
parser.add_argument('--sample_info', type=str,default="sample_info.txt",required=True,
help='sample info')
parser.add_argument('--SampleType', type=str,default="sample.txt",required=True,
help='sample name and library name')
parser.add_argument('--immune',required=True,type=str,
default='TCR',
help='choose immune rep databases,related to immune rep databases to use,\
default database TCR. Other option is BCR')
parser.add_argument('--batch',required=True,type=str,
default='Batch3',
help='batch node')
parser.add_argument('--species',required=True,type=str,
default='hsa',
help='choose species,related to genome and database to use ')
parser.add_argument('--Contract', required=True,default='Contract',type=str,
help='Contract')
parser.add_argument('--mem',required=True,type=str,
default='100G',
help='mem')
parser.add_argument('--step', type=int,default="1",required=True,
help='selece step')
args = parser.parse_args()
global batch
batch = args.batch
sample_type,pid1=PrepareFastq(args.DataManage,args.Contract,args.SampleType)
step1(args.species)
sample_info_dic,pid2=PrepareCsv(args.species,args.sample_info,args.immune,pid1)
QC(pid2)
multid = RunMulti(pid2,args.mem)
pid=Run_Format(multid,args.Contract,args.immune)
if args.step == 2:
run_cmds.close()
if __name__ == '__main__':
main()