1 Star 0 Fork 0

jinlong / Han-Tibetan-ONT-SV

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
pipeline.sv.calling.sh 4.17 KB
一键复制 编辑 原始数据 按行查看 历史
jinlong 提交于 2022-02-17 17:23 . update README.md and main figure code
##-------------------------- 1:standard analysis -----------------------------#
# download reference fasta
# wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GRCh38_major_release_seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz
# download gene annotation gtf (gencode)
# https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_39/gencode.v39.annotation.gtf.gz
# download rmsk
# https://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/rmsk.txt.gz
# download segdup
# https://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/genomicSuperDups.txt.gz
# download nanovar filter_bed
# https://github.com/benoukraflab/NanoVar/blob/master/nanovar/gaps/hg38_curated_filter_main.bed
## standard analysis call sv ##
# minimap2
minimap2 -t 30 --MD -Y -L -a -x map-ont --secondary=no GCA_000001405.15_GRCh38_no_alt_analysis_set.fna $sample.pass.fastq|samtools sort -@ 20 -o $sample.minimap2.sorted.bam && samtools index -@ 30 $sample.minimap2.sorted.bam
# sniffles
sniffles -m $sample.minimap2.bam -v $sample.sniffles.vcf -s 2 -t 30 --ignore_sd -l 50 --genotype
# cutesv
cuteSV -t 30 --max_cluster_bias_DEL 100 --diff_ratio_merging_DEL 0.3 -s 2 -l 50 --genotype $sample.minimap2.bam /sfs/xiaoyh/database/genome/hg38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna $outdir$sample.cutesv.vcf $outdir
# nanovar
nanovar -t 30 -x ont -f hg38_curated_filter_main.bed -l 50 -s 10 $sample.minimap2.bam GCA_000001405.15_GRCh38_no_alt_analysis_set.fna $outdir
# survivor
ls $sampledir/*.vcf > fofn
SURVIVOR merge fofn 1000 1 1 0 0 50 $outdir/$sample.survivor.merge.vcf
python filterSurvivorCI.py fofn $sample.survivor.merge.vcf $sample.survivor.merge.filter.vcf
# vcffile list
ls $indir/*.survivor.merge.filter.vcf > allfofn
# A1: sv merge
dbSV_merge -f fofn -d 1000 -l 2 -r 0.4 -o svmergefile >mergelog1 2>&1
# A2: svmerge To merge VCF
python2 script/mergevcf2vcf.py --merge svmergefile --vcflist fofn -o dbsvmerge.320samples.vcf
awk -F ";SVLEN=|;SVTYPE" '$2<20000' dbsvmerge.320samples.vcf|grep -v 'SVTYPE=TRA' |grep -v chrM >Final.dbsvmerge.vcf
python3 addseq.py svmergefile fofn Final.dbsvmerge.vcf >Final.dbsvmerge.addseq.vcf
grep 'SVTYPE=INS' Final.dbsvmerge.addseq.vcf|awk '$5!~/INS/' |awk '{print ">"$3"\n"$5}' >INS.seq.fa
grep -P 'SUPP=1;|#' Final.dbsvmerge.vcf|grep -v '##' > singleton/SUPP1.vcf
python3 singleton/deal.py
awk -F "\t" 'NR==FNR{a[$1]=$1;next}NR>FNR{if (!($3 in a)) print $0}' singleton/filtersvid Final.dbsvmerge.addseq.vcf >Final.dbsvmerge.Tibetan-Han.vcf
# A3: vcf To matrix
python2 vcf2matrix.py Final.dbsvmerge.Tibetan-Han.vcf >Final.dbsvmerge.Tibetan-Han.matrix.xls
# A4 vcf annotation
python2 script/sv_filter.py --vcf Final.dbsvmerge.Tibetan-Han.vcf --prefix allsamples.svmerge --outdir annot/
python3 script/svannot.py --bed annot/allsamples.svmerge.bed --prefix Tibetan-Han --ref_version hg38 --db annovar.database/humandb/ --annovar table_annovar.pl --outdir annot/ > annovar.log 2>&1
## B: merge g1000
wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20210124.SV_Illumina_Integration/1KGP_3202.Illumina_ensemble_callset.freeze_V1.vcf.gz
echo -e "Final.dbsvmerge.Tibetan-Han.vcf\n20210124.SV_Illumina_Integration/1KGP_3202.Illumina_ensemble_callset.freeze_V1.vcf" >fofn2
dbSV_merge -f fofn2 -l 2 -r 0.5 -o g1000mergevcf >mergelog2 2>&1
python script/mergevcf2vcf1000intersect.py g1000mergevcf fofn2 >usandgenotypes.1000genomeintersect.vcf
python2 script/vcf2matrix.py usandgenotypes.1000genomeintersect.vcf >usandgenotypes.1000genomeintersect.matrix.xls
## C: merge EAS
python getEAS.vcf.py >EAS-addCHB.1KGP_3202.Illumina_ensemble_callset.freeze_V1.vcf
python vcf2matrix.filter.py EAS-addCHB.1KGP_3202.Illumina_ensemble_callset.freeze_V1.vcf >EAS-addCHB.1KGP_3202.Illumina_ensemble_callset.freeze_V1.filter.vcf
echo -e "Final.dbsvmerge.Tibetan-Han.vcf\nEAS-addCHB.1KGP_3202.Illumina_ensemble_callset.freeze_V1.filter.vcf" >fofn3
dbSV_merge -f fofn3 -l 2 -r 0.5 -o EASmergevcf >mergelog3 2>&1
python script/mergevcf2vcf1000intersect.py EASmergevcf EASvcflist >usandgenotypes.EASgenomeintersect.vcf
python2 script/vcf2matrix.py usandgenotypes.EASgenomeintersect.vcf >usandgenotypes.EASgenomeintersect.mxtrix.xls
R
1
https://gitee.com/jinlongshi/Han-Tibetan-ONT-SV.git
git@gitee.com:jinlongshi/Han-Tibetan-ONT-SV.git
jinlongshi
Han-Tibetan-ONT-SV
Han-Tibetan-ONT-SV
master

搜索帮助