====== 基于snp结果做GO-KEGG富集分析 ======
套用转录的GO-KEGG富集分析流程,对重测序snp结果进行GO-KEGG富集分析
===== 测试路径和参考工单 =====
工单:231215-00026
测试路径:/TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich
===== 所需文件 =====
1. 需要参考基因组的fa序列和gff文件
===== 运行方式 =====
参考:work.sh
#step1:
#gff转gtf
/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/genome/gff_to_gtf-zhangxin_v1 \
-F GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.gff \
-O GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.gtf
#提取gene.fa
/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/python_2.7/bin/python \
/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/genome/get.seq.py \
--fa GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.fna \
--gtf GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.gtf \
--type gene \
--rely te \
--outfile gene.fa
参考:gene_go.sh
#step2:
#将参考基因组的fa序列比对到数据库中
export PYTHONPATH=""
unset PERL5LIB
export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/prepare_data/bin:$PATH
export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/r_3.5.0/bin:$PATH
export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/cromwell_62/bin:$PATH
workdir=`pwd`
cd $workdir
TransDecoder.LongOrfs -t /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa
diamond blastp --query /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa.transdecoder_dir/longest_orfs.pep --db /TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/database/swissprot/swissprot.dmnd --max-target-seqs 1 --outfmt 6 --evalue 1e-5 --threads 8 --out /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/blastp_outfmt6.txt
hmmscan --cpu 8 --domtblout /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pfam_domtblout.txt -o /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pfam.log /TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/database/pfam/Pfam-A.hmm /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa.transdecoder_dir/longest_orfs.pep
TransDecoder.Predict -t /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa --retain_pfam_hits /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pfam_domtblout.txt --retain_blastp_hits /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/blastp_outfmt6.txt
/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans_prok/manual_gb_prok/software/miniconda3/envs/python_2.7.14/bin/python /TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/get_gene_pep --pep /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa.transdecoder.pep --outfile /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep.fa
/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/database/Annotation/interproscan/interproscan-5.47-82.0/interproscan.sh -t p -i /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep.fa -f TSV -b /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep -T /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich -iprlookup -goterms -pa -appl Pfam,SUPERFAMILY
/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/get_interpro_go --interpro /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep.tsv --outfile go.txt
/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/get_go_all --go go.txt --outfile go2.txt
参考:work_enrich_GO.sh
export PYTHONPATH=""
unset PERL5LIB
export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/r_3.5.0/bin:$PATH
#这里需要输入snp_geneid.xls
#/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/GO_DAG \
/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/GO \
--go go2.txt \
--diffgene snp_geneid.xls \
--diffresult ref_geneid.xls \
--enrich normal \
--prefix result/GO_enrich
/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/bar_plot \
--stat result/GO_enrich_GOenrich.xls \
--type GO \
--cutoff padj \
--prefix result/GObar
/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/dot_plot \
--stat result/GO_enrich_GOenrich.xls \
--type GO \
--cutoff padj \
--prefix result/GOdot
===== 最终结果 =====
{{ :go-enrich.png?600 |}}