目录

简介

基于基因的关联分析

功能

基于基因的关联分析(Burden)

脚本使用

1. 配置文件:

合并后的snp注释文件


示例脚本step1:过滤SNP文件
echo "Filter snp start: " $(date) && \
python /ifs/TJPROJ3/DISEASE/share/Disease/Association/Burden/VariantFilter.v1.4.4.py \
 --in /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/snp.merged.annovar.hg19_multianno.xls.gz \
 --func \
 --sys \
 --loss \
 --sp 2 \
 --freqli '1000g_ALL 0.005;GnomAD_ALL_AF 0.005;GnomAD_EAS_AF 0.005;NovoDb_WES 0.005' \
 --dam P \
 --gerp \
 --repeat \
 --out /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.snp && \
echo "Filter snp end: " $(date) 


示例脚本step2:过滤indel文件

echo "Filter indel start: " $(date) && \
python /ifs/TJPROJ3/DISEASE/share/Disease/Association/Burden/VariantFilter.v1.4.4.py \
 --in /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/indel.merged.annovar.hg19_multianno.xls.gz \
 --func \
 --sys \
 --loss \
 --sp 2 \
 --freqli '1000g_ALL 0.005;GnomAD_ALL_AF 0.005;GnomAD_EAS_AF 0.005;NovoDb_WES 0.005' \
 --dam F \
 --repeat \
 --out /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.indel && \
echo "Filter indel end: " $(date) 



示例脚本step3:卡bed区间

echo "Extract bed start: " $(date) && \
python /ifs/TJPROJ3/DISEASE/Database/ExAC/gnomAD/ExtractAgilentBed_only.py \
 /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.snp.func.sys.1000g_ALL.GnomAD_ALL_AF.GnomAD_EAS_AF.NovoDb_WES.repeat.deleterious.xls \
/ifs/TJPROJ3/DISEASE/Database/Exome_bed/Agilent/SureSelectXT.Human.All.Exon.V6/S07604514_Regions_extract.bed \
 /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.snp.func.sys.1000g_ALL.GnomAD_ALL_AF.GnomAD_EAS_AF.NovoDb_WES.repeat.deleterious.bed_only.xls && \
python /ifs/TJPROJ3/DISEASE/Database/ExAC/gnomAD/ExtractAgilentBed_only.py \
 /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.indel.func.sys.1000g_ALL.GnomAD_ALL_AF.GnomAD_EAS_AF.NovoDb_WES.repeat.xls \
/ifs/TJPROJ3/DISEASE/Database/Exome_bed/Agilent/SureSelectXT.Human.All.Exon.V6/S07604514_Regions_extract.bed \
	/TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.indel.func.sys.1000g_ALL.GnomAD_ALL_AF.GnomAD_EAS_AF.NovoDb_WES.repeat.bed_only.xls && \
echo "Extract bed end: " $(date) 

示例脚本step4:merge常染色体的snp和indel

echo "Autosome and Merge SNP INDEL start: " $(date) && \
awk -F"\t" '{if ($2!="X" && $2!="Y") print}' /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.snp.func.sys.1000g_ALL.GnomAD_ALL_AF.GnomAD_EAS_AF.NovoDb_WES.repeat.deleterious.bed_only.xls > /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.snp.filtered.padded.autosome.xls && \
awk -F"\t" '{if ($2!="X" && $2!="Y") print}' /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.indel.func.sys.1000g_ALL.GnomAD_ALL_AF.GnomAD_EAS_AF.NovoDb_WES.repeat.bed_only.xls > /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.indel.filtered.padded.autosome.xls && \
cat /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.snp.filtered.padded.autosome.xls /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.indel.filtered.padded.autosome.xls | sed -E "2,\${/Priority/d}" > /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.snp.indel.filtered.padded.autosome.xls && \
echo "Autosome Merge SNP INDEL end: " $(date) 

示例脚本step5:进行snp和indel的burden分析,计算P值等;
echo "Burden snp start: " $(date) && \
python /ifs/TJPROJ3/DISEASE/share/Disease/Association/Burden/GetBurdenFre_v1.6.py \
 -case /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.snp.filtered.padded.autosome.xls \
 -control /ifs/TJPROJ3/DISEASE/share/Disease/Association/Burden/NovoDb2827/Novo.2827.wes.snp.filterAgilentV6Pad100.stat.noXY.xls \
 -cc N \
 -Num 2827 \
 -cr 0.95 \
 -nr 0.6  \
 -out /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.burden.snp.stat.xls && \
cd /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test && \
Rscript /ifs/TJPROJ3/DISEASE/share/Disease/Association/Burden/GeneFisherPlot.R \
 --infile S167.burden.snp.stat.xls \
 --outpre S167.burden.snp && \
paste S167.burden.snp.fisher.xls S167.burden.snp.stat.samstat.xls |sort -g -k6> S167.burden.snp.fisher.sample.sorted.xls && \
echo "Burden snp end: " $(date):merge常染色体的snp和indel

示例脚本step56:进行snp的burden分析,计算P值等;
echo "Burden snp start: " $(date) && \
python /ifs/TJPROJ3/DISEASE/share/Disease/Association/Burden/GetBurdenFre_v1.6.py \
 -case /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.snp.filtered.padded.autosome.xls \
 -control /ifs/TJPROJ3/DISEASE/share/Disease/Association/Burden/NovoDb2827/Novo.2827.wes.snp.filterAgilentV6Pad100.stat.noXY.xls \
 -cc N \
 -Num 2827 \
 -cr 0.95 \
 -nr 0.6  \
 -out /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/S167.burden.snp.stat.xls && \
cd /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test && \
Rscript /ifs/TJPROJ3/DISEASE/share/Disease/Association/Burden/GeneFisherPlot.R \
 --infile S167.burden.snp.stat.xls \
 --outpre S167.burden.snp && \
paste S167.burden.snp.fisher.xls S167.burden.snp.stat.samstat.xls |sort -g -k6> S167.burden.snp.fisher.sample.sorted.xls && \
echo "Burden snp end: " $(date)

运行脚本:

/TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/work.sh
sh 刷脚本;

/TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test/shell
qsub投递;

交付结果

burden的表格; 曼哈顿图; QQ图;

示例路径: /TJPROJ6/AFS_RESEQ/Proj/hanyue/06.gaoji/Burden/test