# !/usr/bin/python # -*—coding=utf-8-*- # author: lizhengnan import os import re import argparse parser = argparse.ArgumentParser(description='对wgcna结果中每个模块内的基因进行富集分析') # 添加参数 parser.add_argument('--wgcna', type=str, help='基因-模块对应关系列表路径, 例如:/TJPROJ6/RNA_SH/shouhou/202308/X101SC23063706-Z01-J001/Result_X101SC22123709-Z01-F002-B2-16/9.WGCNA/3.模块特征及表达模式/3.4模块基因表达模式/基因-模块对应关系列表') parser.add_argument('--gene', type=str, help='gene.xls') parser.add_argument('--go', type=str, help='go.xls') parser.add_argument('--kegg', type=str, help='kegg.xls') parser.add_argument('--kegg_abbr', type=str, help='kegg物种缩写') parser.add_argument('--outdir', type=str, help='the outdir') argv = vars(parser.parse_args()) if argv['wgcna'] is None: raise Exception('You should provide WGCNA result dir!') else: wgcna = argv['wgcna'].strip() if argv['gene'] is None: raise Exception('You should provide gene.xls!') else: gene = argv['gene'].strip() if argv['go'] is None: raise Exception('You should provide go.xls!') else: go = argv['go'].strip() if argv['kegg'] is None: raise Exception('You should provide the kegg.xls file !') else: kegg = argv['kegg'].strip() if argv['kegg_abbr'] is None: raise Exception('You should provide the kegg abbr infomation !') else: kegg_abbr = argv['kegg_abbr'].strip() if argv['outdir'] is None: outdir = './' else: outdir = argv['outdir'].strip() if not os.path.exists(outdir): os.mkdir(outdir) script_dir = os.path.join(outdir, 'log') if not os.path.exists(script_dir): os.makedirs(script_dir) def get_module_name(dir): module_file = os.listdir(dir) module_name = [] for i in module_file: name = re.match('^LinkID-(.*).txt$', i).group(1) module_name.append(name) # 删除all 和 gery 模块名,这两个模块不做富集分析 module_name.remove('all') module_name.remove('grey') return module_name def enrich_cmd(wgcna, module_name, gene, go, kegg, kegg_abbr, script_dir): for module in module_name: script_file = os.path.join(script_dir, module + "_enrich.sh") cmd = 'export PYTHONPATH=\n' cmd += 'unset PERL5LIB\n' cmd += 'export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/r_3.5.0/bin:$PATH\n\n' # 添加gene_name cmd += "sed -i '1igene_id' {}/LinkID-{}.txt\n".format(wgcna, module) cmd += 'Rscript /TJPROJ6/RNA_SH/personal_dir/lizhengnan/scripts/add_anno.R --files {}/LinkID-{}.txt --anno {}\n\n'.format(wgcna, module, gene) # GO go_outdir = os.path.join(outdir, 'Enrichment/1.GO', module) cmd += 'mkdir -p {}\n\n'.format(go_outdir) cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/GO --go {} --diffgene {}/LinkID-{}.txt --diffresult {}' \ ' --enrich normal --prefix {}/{}\n\n'.format(go, wgcna, module, gene, go_outdir, module) cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/bar_plot --stat {}/{}_GOenrich.xls' \ ' --type GO --cutoff padj --prefix {}/{}_GObar\n'.format(go_outdir, module, go_outdir, module) cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/dot_plot --stat {}/{}_GOenrich.xls' \ ' --type GO --cutoff padj --prefix {}/{}_GOdot\n\n'.format(go_outdir, module,go_outdir, module) # KEGG kegg_outdir = os.path.join(outdir, 'Enrichment/2.KEGG', module) cmd += 'mkdir -p {}\n\n'.format(kegg_outdir) cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/KEGG --kegg {} --diffgene {}/LinkID-{}.txt --diffresult {} ' \ ' --enrich normal --prefix {}/{} --abbr {}\n\n'.format(kegg, wgcna, module, gene, kegg_outdir, module, kegg_abbr) cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/bar_plot --stat {}/{}_KEGGenrich.xls' \ ' --type KEGG --cutoff padj --prefix {}/{}_KEGGbar\n'.format(kegg_outdir, module, kegg_outdir, module) cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/dot_plot --stat {}/{}_KEGGenrich.xls' \ ' --type KEGG --cutoff padj --prefix {}/{}_KEGGdot\n\n'.format(kegg_outdir, module, kegg_outdir, module) cmd += 'cd {}\n'.format(kegg_outdir) cmd += 'python2 /TJPROJ6/RNA_SH/personal_dir/lizhengnan/scripts/WGCNA/simple_pathway_html.py --table {}/{}_KEGGenrich.xls' \ ' --prefix {}/{} --abbr {}\n'.format(kegg_outdir, module, kegg_outdir, module, kegg_abbr) cmd += 'mv {}_pathway_detail.html {}_KEGGenrich.html'.format(module, module) with open(script_file, "w") as f: f.write(cmd) module_name = get_module_name(wgcna) enrich_cmd(wgcna, module_name, gene, go, kegg, kegg_abbr, script_dir) 使用方法: python3 wgcna_enrich.py --help usage: wgcna_enrich.py [-h] [--wgcna WGCNA] [--gene GENE] [--go GO] [--kegg KEGG] [--kegg_abbr KEGG_ABBR] [--outdir OUTDIR] 对wgcna结果中除了all和grey模块, 对其余模块内的基因进行富集分析 optional arguments: -h, --help show this help message and exit --wgcna WGCNA 输入WGCNA结果文件路径 --gene GENE gene.xls --go GO go.xls --kegg KEGG kegg.xls --kegg_abbr KEGG_ABBR kegg物种缩写 --outdir OUTDIR the outdir