套用转录的GO-KEGG富集分析流程,对重测序snp结果进行GO-KEGG富集分析
工单:231215-00026
测试路径:/TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich
1. 需要参考基因组的fa序列和gff文件
参考:work.sh #step1: #gff转gtf /TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/genome/gff_to_gtf-zhangxin_v1 \ -F GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.gff \ -O GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.gtf #提取gene.fa /TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/python_2.7/bin/python \ /TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/genome/get.seq.py \ --fa GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.fna \ --gtf GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.gtf \ --type gene \ --rely te \ --outfile gene.fa
参考:gene_go.sh
#step2: #将参考基因组的fa序列比对到数据库中 export PYTHONPATH="" unset PERL5LIB export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/prepare_data/bin:$PATH export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/r_3.5.0/bin:$PATH export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/cromwell_62/bin:$PATH workdir=`pwd` cd $workdir TransDecoder.LongOrfs -t /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa diamond blastp --query /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa.transdecoder_dir/longest_orfs.pep --db /TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/database/swissprot/swissprot.dmnd --max-target-seqs 1 --outfmt 6 --evalue 1e-5 --threads 8 --out /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/blastp_outfmt6.txt hmmscan --cpu 8 --domtblout /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pfam_domtblout.txt -o /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pfam.log /TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/database/pfam/Pfam-A.hmm /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa.transdecoder_dir/longest_orfs.pep TransDecoder.Predict -t /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa --retain_pfam_hits /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pfam_domtblout.txt --retain_blastp_hits /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/blastp_outfmt6.txt /TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans_prok/manual_gb_prok/software/miniconda3/envs/python_2.7.14/bin/python /TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/get_gene_pep --pep /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa.transdecoder.pep --outfile /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep.fa /TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/database/Annotation/interproscan/interproscan-5.47-82.0/interproscan.sh -t p -i /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep.fa -f TSV -b /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep -T /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich -iprlookup -goterms -pa -appl Pfam,SUPERFAMILY /TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/get_interpro_go --interpro /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep.tsv --outfile go.txt /TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/get_go_all --go go.txt --outfile go2.txt
参考:work_enrich_GO.sh
export PYTHONPATH="" unset PERL5LIB export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/r_3.5.0/bin:$PATH #这里需要输入snp_geneid.xls #/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/GO_DAG \ /TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/GO \ --go go2.txt \ --diffgene snp_geneid.xls \ --diffresult ref_geneid.xls \ --enrich normal \ --prefix result/GO_enrich /TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/bar_plot \ --stat result/GO_enrich_GOenrich.xls \ --type GO \ --cutoff padj \ --prefix result/GObar /TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/dot_plot \ --stat result/GO_enrich_GOenrich.xls \ --type GO \ --cutoff padj \ --prefix result/GOdot