用户工具

站点工具


wgcna富集分析
# !/usr/bin/python
# -*—coding=utf-8-*-
# author: lizhengnan


import os
import re
import argparse

parser = argparse.ArgumentParser(description='对wgcna结果中每个模块内的基因进行富集分析')

# 添加参数
parser.add_argument('--wgcna', type=str, help='基因-模块对应关系列表路径, 例如:/TJPROJ6/RNA_SH/shouhou/202308/X101SC23063706-Z01-J001/Result_X101SC22123709-Z01-F002-B2-16/9.WGCNA/3.模块特征及表达模式/3.4模块基因表达模式/基因-模块对应关系列表')
parser.add_argument('--gene', type=str, help='gene.xls')
parser.add_argument('--go', type=str, help='go.xls')
parser.add_argument('--kegg', type=str, help='kegg.xls')
parser.add_argument('--kegg_abbr', type=str, help='kegg物种缩写')
parser.add_argument('--outdir', type=str, help='the outdir')

argv = vars(parser.parse_args())

if argv['wgcna'] is None:
    raise Exception('You should provide WGCNA result dir!')
else:
    wgcna = argv['wgcna'].strip()

if argv['gene'] is None:
    raise Exception('You should provide gene.xls!')
else:
    gene = argv['gene'].strip()

if argv['go'] is None:
    raise Exception('You should provide go.xls!')
else:
    go = argv['go'].strip()

if argv['kegg'] is None:
    raise Exception('You should provide the kegg.xls file !')
else:
    kegg = argv['kegg'].strip()

if argv['kegg_abbr'] is None:
    raise Exception('You should provide the kegg abbr infomation !')
else:
    kegg_abbr = argv['kegg_abbr'].strip()

if argv['outdir'] is None:
    outdir = './'
else:
    outdir = argv['outdir'].strip()

if not os.path.exists(outdir):
    os.mkdir(outdir)

script_dir = os.path.join(outdir, 'log')
if not os.path.exists(script_dir):
    os.makedirs(script_dir)

def get_module_name(dir):
    module_file = os.listdir(dir)
    module_name = []
    for i in module_file:
        name = re.match('^LinkID-(.*).txt$', i).group(1)
        module_name.append(name)
    # 删除all 和 gery 模块名,这两个模块不做富集分析
    module_name.remove('all')
    module_name.remove('grey')
    return module_name

def enrich_cmd(wgcna, module_name, gene, go, kegg, kegg_abbr, script_dir):
    for module in module_name:

        script_file = os.path.join(script_dir, module + "_enrich.sh")
        cmd = 'export PYTHONPATH=\n'
        cmd += 'unset PERL5LIB\n'
        cmd += 'export PATH=/TJPROJ2/GB/PUBLIC/software/GB_TR/mRNA/miniconda3/envs/r_3.5.0/bin:$PATH\n\n'

        # 添加gene_name
        cmd += "sed -i '1igene_id' {}/LinkID-{}.txt\n".format(wgcna, module)
        cmd += 'Rscript /TJPROJ6/RNA_SH/personal_dir/lizhengnan/scripts/add_anno.R --files {}/LinkID-{}.txt --anno {}\n\n'.format(wgcna, module, gene)

        # GO
        go_outdir = os.path.join(outdir, 'Enrichment/1.GO', module)
        cmd += 'mkdir -p {}\n\n'.format(go_outdir)
        cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/GO --go {} --diffgene {}/LinkID-{}.txt --diffresult {}' \
               ' --enrich normal --prefix {}/{}\n\n'.format(go, wgcna, module, gene, go_outdir, module)
        cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/bar_plot --stat {}/{}_GOenrich.xls' \
               ' --type GO --cutoff padj --prefix {}/{}_GObar\n'.format(go_outdir, module, go_outdir, module)
        cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/dot_plot --stat {}/{}_GOenrich.xls' \
               ' --type GO --cutoff padj --prefix {}/{}_GOdot\n\n'.format(go_outdir, module,go_outdir, module)

        # KEGG
        kegg_outdir = os.path.join(outdir, 'Enrichment/2.KEGG', module)
        cmd += 'mkdir -p {}\n\n'.format(kegg_outdir)
        cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/KEGG --kegg {} --diffgene {}/LinkID-{}.txt --diffresult {} ' \
               ' --enrich normal --prefix {}/{} --abbr {}\n\n'.format(kegg, wgcna, module, gene, kegg_outdir, module, kegg_abbr)
        cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/bar_plot --stat {}/{}_KEGGenrich.xls' \
               ' --type KEGG --cutoff padj --prefix {}/{}_KEGGbar\n'.format(kegg_outdir, module, kegg_outdir, module)
        cmd += '/TJPROJ2/GB/PUBLIC/source/GB_TR/mRNA/gb_trans/gb_MedRef_man_pipline/bin/dot_plot --stat {}/{}_KEGGenrich.xls' \
               ' --type KEGG --cutoff padj --prefix {}/{}_KEGGdot\n\n'.format(kegg_outdir, module, kegg_outdir, module)

        cmd += 'cd {}\n'.format(kegg_outdir)
        cmd += 'python2 /TJPROJ6/RNA_SH/personal_dir/lizhengnan/scripts/WGCNA/simple_pathway_html.py --table {}/{}_KEGGenrich.xls' \
               ' --prefix {}/{} --abbr {}\n'.format(kegg_outdir, module, kegg_outdir, module, kegg_abbr)
        cmd += 'mv {}_pathway_detail.html {}_KEGGenrich.html'.format(module, module)

        with open(script_file, "w") as f:
            f.write(cmd)

module_name = get_module_name(wgcna)
enrich_cmd(wgcna, module_name, gene, go, kegg, kegg_abbr, script_dir)

使用方法:

python3 wgcna_enrich.py --help
usage: wgcna_enrich.py [-h] [--wgcna WGCNA] [--gene GENE] [--go GO]
                       [--kegg KEGG] [--kegg_abbr KEGG_ABBR] [--outdir OUTDIR]

对wgcna结果中除了all和grey模块, 对其余模块内的基因进行富集分析

optional arguments:
  -h, --help            show this help message and exit
  --wgcna WGCNA         输入WGCNA结果文件路径
  --gene GENE           gene.xls
  --go GO               go.xls
  --kegg KEGG           kegg.xls
  --kegg_abbr KEGG_ABBR
                        kegg物种缩写
  --outdir OUTDIR       the outdir
wgcna富集分析.txt · 最后更改: 2024/03/08 02:41 由 lizhengnan