====== Oncofuse ====== This tool is designed to predict the oncogenic potential of fusion genes found by Next-Generation Sequencing in cancer cells. It is a post-processing step that tries to validate in-silico the predictions made by fusion detection software. Oncofuse is NOT a fusion detection software, its goal is NOT to identify fusion sequences, but to assign a functional prediction score (oncogenic potential, i.e. the probability of being 'driver' events) to fusion sequences identified by other software such as Tophat-fusion, fusioncatcher or STAR. 官网地址:https://genetica.unav.edu/oncofuse.html ===== 使用流程融合基因结果进行分析 ===== #!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Date : 2023-7-13 # @Author : lizhengnan import os import argparse import sys parser = argparse.ArgumentParser(description="This script performs Oncofuse analysis") parser.add_argument("fusion_result", help="Path to the fusion result file, example: /TJPROJ6/RNA_SH/shouhou/202307/X101SC23022823-Z01-J004/03.Result_X101SC23022823-Z01-J004-B4-16_Homo_sapiens/Result_X101SC23022823-Z01-J004-B4-16/8.Fusion/1.fusionlist/") parser.add_argument("sample2tissue", help="Path to the sample to tissue file, the four supported tissue types: EPI (epithelial origin), HEM (hematological origin), MES (mesenchymal origin) and AVG (average expression, if tissue source is unknown)") #parser.add_argument("sample", help="sample name, split by ','") parser.add_argument("--result_dir", help="Path to the directory to save the results", nargs='?', default=None) parser.add_argument("--genome_version", help="Genome assembly version, default is hg38. Allowed values: hg18, hg19, hg38", default='hg38') args = parser.parse_args() fusion_result = args.fusion_result.strip() sample2tissue = args.sample2tissue.strip() #samples = args.sample.strip().split(',') if args.result_dir.strip() is None : result_dir = args.result_dir.strip() else: result_dir = os.path.join(os.getcwd(), 'result') if not os.path.exists(result_dir): os.mkdir(result_dir) if args.genome_version.strip() : genome_version = args.genome_version.strip() # TISSUE TISSUE = ['EPI', 'HEM', 'MES', 'AVG'] sample2tissue_dict = {} with open(sample2tissue) as f: sample2tissue_tmp = f.readlines() for i in sample2tissue_tmp: if i.startswith('sample'): continue else: sample = i.strip().split('\t')[0] tissue = i.strip().split('\t')[1] if tissue not in TISSUE: tissue = 'AVG' else: pass sample2tissue_dict[sample] = tissue # coord samples = sample2tissue_dict.keys() coord_dir = os.path.join(os.getcwd(), 'coord') if not os.path.exists(coord_dir): os.mkdir(os.path.join(os.getcwd(), 'coord')) for i in samples: tmp_fusion_result = open(fusion_result+"/"+i+"_fusion.xls").readlines() header = tmp_fusion_result.pop(0).strip().split('\t') LeftBreakpoint_index = header.index('LeftBreakpoint') RightBreakpoint_index = header.index('RightBreakpoint') JunctionReadCount_index = header.index('JunctionReadCount') SpanningFragCount_index = header.index('SpanningFragCount') with open(coord_dir + "/" + i, 'w') as tmp_out: tmp_out.write("\t".join(["#5' chrom", "5' coord", "3' chrom", "3' coord", "tissue", "5' FPG", "3' FPG", "spanning", "encompassing"])+"\n") for l in tmp_fusion_result: line = l.strip().split('\t') with open(coord_dir+"/"+i, 'a') as tmp_out: tmp_out.write("\t".join(line[LeftBreakpoint_index].split(":")[0:2]) + "\t" + "\t".join(line[RightBreakpoint_index].split(":")[0:2]) + "\t" + sample2tissue_dict[i] + "\t" + line[LeftBreakpoint_index].split(":")[2] + "\t" + line[RightBreakpoint_index].split(":")[2] + "\t" + line[JunctionReadCount_index] + "\t" + str(int(line[JunctionReadCount_index])+int(line[SpanningFragCount_index]))+"\n") # Script cmd = '''#!/bin/bash\n export java="/PUBLIC/software/public/System/jre1.8.0_25/bin/java"\n\n cd {}\n '''.format(coord_dir) for i in samples: cmd += 'java -jar /TJPROJ6/RNA_SH/software/Oncofuse/oncofuse-1.1.1/Oncofuse.jar -a {} -p 2 {} coord - {}/{}_Oncofuse_result.xls\n\n'.format(genome_version, i, result_dir, i) cmd += 'cp /TJPROJ6/RNA_SH/software/Oncofuse/oncofuse-1.1.1/readme.txt {}'.format(result_dir) with open(os.getcwd() + "/oncofuse.sh", 'w') as script: script.write(cmd) os.system("qsub -V -cwd -l vf=4G,p=2 {}".format(os.getcwd() + "/oncofuse.sh")) ==== 使用方法 ==== python Oncofuse.py --help usage: Oncofuse.py [-h] [--result_dir [RESULT_DIR]] [--genome_version GENOME_VERSION] fusion_result sample2tissue This script performs Oncofuse analysis positional arguments: fusion_result Path to the fusion result file, example: /TJPROJ6/RNA_ SH/shouhou/202307/X101SC23022823-Z01-J004/03.Result_X1 01SC23022823-Z01-J004-B4-16_Homo_sapiens/Result_X101SC 23022823-Z01-J004-B4-16/8.Fusion/1.fusionlist/ sample2tissue Path to the sample to tissue file, the four supported tissue types: EPI (epithelial origin), HEM (hematological origin), MES (mesenchymal origin) and AVG (average expression, if tissue source is unknown) optional arguments: -h, --help show this help message and exit --result_dir [RESULT_DIR] Path to the directory to save the results --genome_version GENOME_VERSION Genome assembly version, default is hg38. Allowed values: hg18, hg19, hg38 ==== 测试路径 ==== /TJPROJ6/RNA_SH/software/Oncofuse/oncofuse-1.1.1/test