====== Oncofuse ======
This tool is designed to predict the oncogenic potential of fusion genes found by Next-Generation Sequencing in cancer cells. It is a post-processing step that tries to validate in-silico the predictions made by fusion detection software. Oncofuse is NOT a fusion detection software, its goal is NOT to identify fusion sequences, but to assign a functional prediction score (oncogenic potential, i.e. the probability of being 'driver' events) to fusion sequences identified by other software such as Tophat-fusion, fusioncatcher or STAR.
官网地址:https://genetica.unav.edu/oncofuse.html
===== 使用流程融合基因结果进行分析 =====
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Date : 2023-7-13
# @Author : lizhengnan
import os
import argparse
import sys
parser = argparse.ArgumentParser(description="This script performs Oncofuse analysis")
parser.add_argument("fusion_result",
help="Path to the fusion result file, example: /TJPROJ6/RNA_SH/shouhou/202307/X101SC23022823-Z01-J004/03.Result_X101SC23022823-Z01-J004-B4-16_Homo_sapiens/Result_X101SC23022823-Z01-J004-B4-16/8.Fusion/1.fusionlist/")
parser.add_argument("sample2tissue",
help="Path to the sample to tissue file, the four supported tissue types: EPI (epithelial origin), HEM (hematological origin), MES (mesenchymal origin) and AVG (average expression, if tissue source is unknown)")
#parser.add_argument("sample", help="sample name, split by ','")
parser.add_argument("--result_dir", help="Path to the directory to save the results", nargs='?', default=None)
parser.add_argument("--genome_version", help="Genome assembly version, default is hg38. Allowed values: hg18, hg19, hg38", default='hg38')
args = parser.parse_args()
fusion_result = args.fusion_result.strip()
sample2tissue = args.sample2tissue.strip()
#samples = args.sample.strip().split(',')
if args.result_dir.strip() is None :
result_dir = args.result_dir.strip()
else:
result_dir = os.path.join(os.getcwd(), 'result')
if not os.path.exists(result_dir):
os.mkdir(result_dir)
if args.genome_version.strip() :
genome_version = args.genome_version.strip()
# TISSUE
TISSUE = ['EPI', 'HEM', 'MES', 'AVG']
sample2tissue_dict = {}
with open(sample2tissue) as f:
sample2tissue_tmp = f.readlines()
for i in sample2tissue_tmp:
if i.startswith('sample'):
continue
else:
sample = i.strip().split('\t')[0]
tissue = i.strip().split('\t')[1]
if tissue not in TISSUE:
tissue = 'AVG'
else:
pass
sample2tissue_dict[sample] = tissue
# coord
samples = sample2tissue_dict.keys()
coord_dir = os.path.join(os.getcwd(), 'coord')
if not os.path.exists(coord_dir):
os.mkdir(os.path.join(os.getcwd(), 'coord'))
for i in samples:
tmp_fusion_result = open(fusion_result+"/"+i+"_fusion.xls").readlines()
header = tmp_fusion_result.pop(0).strip().split('\t')
LeftBreakpoint_index = header.index('LeftBreakpoint')
RightBreakpoint_index = header.index('RightBreakpoint')
JunctionReadCount_index = header.index('JunctionReadCount')
SpanningFragCount_index = header.index('SpanningFragCount')
with open(coord_dir + "/" + i, 'w') as tmp_out:
tmp_out.write("\t".join(["#5' chrom", "5' coord", "3' chrom", "3' coord", "tissue", "5' FPG", "3' FPG", "spanning", "encompassing"])+"\n")
for l in tmp_fusion_result:
line = l.strip().split('\t')
with open(coord_dir+"/"+i, 'a') as tmp_out:
tmp_out.write("\t".join(line[LeftBreakpoint_index].split(":")[0:2]) + "\t" +
"\t".join(line[RightBreakpoint_index].split(":")[0:2]) + "\t" + sample2tissue_dict[i] + "\t" +
line[LeftBreakpoint_index].split(":")[2] + "\t" + line[RightBreakpoint_index].split(":")[2] +
"\t" + line[JunctionReadCount_index] + "\t" + str(int(line[JunctionReadCount_index])+int(line[SpanningFragCount_index]))+"\n")
# Script
cmd = '''#!/bin/bash\n
export java="/PUBLIC/software/public/System/jre1.8.0_25/bin/java"\n\n
cd {}\n
'''.format(coord_dir)
for i in samples:
cmd += 'java -jar /TJPROJ6/RNA_SH/software/Oncofuse/oncofuse-1.1.1/Oncofuse.jar -a {} -p 2 {} coord - {}/{}_Oncofuse_result.xls\n\n'.format(genome_version, i, result_dir, i)
cmd += 'cp /TJPROJ6/RNA_SH/software/Oncofuse/oncofuse-1.1.1/readme.txt {}'.format(result_dir)
with open(os.getcwd() + "/oncofuse.sh", 'w') as script:
script.write(cmd)
os.system("qsub -V -cwd -l vf=4G,p=2 {}".format(os.getcwd() + "/oncofuse.sh"))
==== 使用方法 ====
python Oncofuse.py --help
usage: Oncofuse.py [-h] [--result_dir [RESULT_DIR]]
[--genome_version GENOME_VERSION]
fusion_result sample2tissue
This script performs Oncofuse analysis
positional arguments:
fusion_result Path to the fusion result file, example: /TJPROJ6/RNA_
SH/shouhou/202307/X101SC23022823-Z01-J004/03.Result_X1
01SC23022823-Z01-J004-B4-16_Homo_sapiens/Result_X101SC
23022823-Z01-J004-B4-16/8.Fusion/1.fusionlist/
sample2tissue Path to the sample to tissue file, the four supported
tissue types: EPI (epithelial origin), HEM
(hematological origin), MES (mesenchymal origin) and
AVG (average expression, if tissue source is unknown)
optional arguments:
-h, --help show this help message and exit
--result_dir [RESULT_DIR]
Path to the directory to save the results
--genome_version GENOME_VERSION
Genome assembly version, default is hg38. Allowed
values: hg18, hg19, hg38
==== 测试路径 ====
/TJPROJ6/RNA_SH/software/Oncofuse/oncofuse-1.1.1/test