目录

基于snp结果做GO-KEGG富集分析

套用转录的GO-KEGG富集分析流程,对重测序snp结果进行GO-KEGG富集分析

测试路径和参考工单

工单:231215-00026

测试路径:/TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich

所需文件

1. 需要参考基因组的fa序列和gff文件

运行方式

参考:work.sh
#step1:

#gff转gtf
/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/genome/gff_to_gtf-zhangxin_v1 \
  -F GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.gff \
  -O GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.gtf

#提取gene.fa
/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/python_2.7/bin/python \
  /TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/genome/get.seq.py \
  --fa GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.fna \
  --gtf GCA_000446055.1_Colletotrichum_gloeosporioides1_genomic.gtf \
  --type gene \
  --rely te \
  --outfile gene.fa

参考:gene_go.sh

#step2:
#将参考基因组的fa序列比对到数据库中
export PYTHONPATH=""
unset PERL5LIB
export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/prepare_data/bin:$PATH
export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/r_3.5.0/bin:$PATH
export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/cromwell_62/bin:$PATH

workdir=`pwd`
cd $workdir

TransDecoder.LongOrfs -t /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa

diamond blastp --query /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa.transdecoder_dir/longest_orfs.pep --db /TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/database/swissprot/swissprot.dmnd --max-target-seqs 1 --outfmt 6 --evalue 1e-5 --threads 8 --out /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/blastp_outfmt6.txt

hmmscan --cpu 8 --domtblout /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pfam_domtblout.txt -o /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pfam.log /TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/database/pfam/Pfam-A.hmm /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa.transdecoder_dir/longest_orfs.pep

TransDecoder.Predict -t /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa --retain_pfam_hits /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pfam_domtblout.txt --retain_blastp_hits /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/blastp_outfmt6.txt

/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans_prok/manual_gb_prok/software/miniconda3/envs/python_2.7.14/bin/python /TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/get_gene_pep --pep /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/gene.fa.transdecoder.pep --outfile /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep.fa

/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/database/Annotation/interproscan/interproscan-5.47-82.0/interproscan.sh -t p -i /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep.fa -f TSV -b /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep -T /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich -iprlookup -goterms -pa -appl Pfam,SUPERFAMILY

/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/get_interpro_go --interpro /TJPROJ5/META_ASS/meta/sunhongtao/KEGG-go-enrich/pep.tsv --outfile go.txt
/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/get_go_all --go go.txt --outfile go2.txt

参考:work_enrich_GO.sh

export PYTHONPATH=""
unset PERL5LIB
export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/r_3.5.0/bin:$PATH

#这里需要输入snp_geneid.xls

#/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/GO_DAG \
/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/GO \
  --go go2.txt \
  --diffgene snp_geneid.xls  \
  --diffresult ref_geneid.xls \
  --enrich normal \
  --prefix result/GO_enrich

/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/bar_plot \
  --stat result/GO_enrich_GOenrich.xls \
  --type GO \
  --cutoff padj \
  --prefix result/GObar

/TJPROJ7/GB_TR/PUBLIC/source/ncRNA/gb_tr_man_pipline/bin/dot_plot \
  --stat result/GO_enrich_GOenrich.xls \
  --type GO \
  --cutoff padj \
  --prefix result/GOdot

最终结果