=====SOAPfuse===== SOAPfuse是华大开发的一款,专门针对human的融合基因进行分析的工具。 在运行SOAPfuse之前,需要准备以下几个文件: Homo_sapiens_Ensemble_94.fa Homo_sapiens_Ensemble_94.gtf cytoBand.txt.gz #http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz(染色体是否一致,chr问题需要处理成一致) HGNC_Gene_Family_dataset.txt #http://www.genenames.org/cgi-bin/genefamilies/download-all/tsv HumanRef_refseg_symbols_relationship.list #gtf中染色体和fa的的对应关系(chr问题) 构建索引: 见 /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/index.sh export PERL5LIB=$PERL5LIB:/TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/source/bin/perl_module perl /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/source/SOAPfuse-S00-Generate_SOAPfuse_database.pl -wg /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/Homo_sapiens_Ensemble_94.fa -gtf /TJPROJ6/GB_TR/reference_data/new_pip/Animal/Homo_sapiens/Homo_sapiens_Ensemble_94/Homo_sapiens_Ensemble_94.gtf -cbd /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/cytoBand.txt.gz -gf /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/HGNC_Gene_Family_dataset.txt -sd /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27 -dd /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db -rft /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/HumanRef_refseg_symbols_relationship.list 分析需要文件为原始fq.gz文件,填写sample.list,格式如下(\t分隔): rawdata DXBR_361_009TB DXBR_361_009TB 150 目录名 文库名 文件名 reads长度 分析: 见 /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test/test.sh export PERL5LIB=$PERL5LIB:/TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/source/bin/perl_module /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/SOAPfuse-RUN.pl -c /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/config/config.txt -fd /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test -l /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test/sample.list -o /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test 结果见 /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test/final_fusion_genes/rawdata/result.zip {{fusion_soapfuse:fusion_soapfuse_result.zip}} ===软件配置文件说明=== #DB_db_dir = /PUBLIC/database/RNA/Med/Database/Fusion/SOAPfuse/GRCh38 DB_db_dir = /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db DB_wg_soap_ref = $(db_dir)/WG_index_soap/genome.fa.index DB_cytoBand = $(db_dir)/cytoBand.txt DB_trans_soap_ref = $(db_dir)/transcript_index_soap/transcript.fa.index DB_trans_bwa_ref = $(db_dir)/transcript_index_bwa/transcript.fa DB_trans_psl = $(db_dir)/transcript.psl DB_trans_gtf = $(db_dir)/Gene_annotation.gtf.gz DB_gene_psl = $(db_dir)/gene.psl DB_gene_fa = $(db_dir)/gene.fa DB_genefamily = $(db_dir)/gene_family/gene_family.brief.txt DB_blast_homo_list = $(db_dir)/blast_homo_gene.m8.gz PG_pg_dir = /PUBLIC/software/RNA/SOAPfuse/SOAPfuse-v1.27/source/bin PG_soap = $(pg_dir)/aln_bin/soap2.21 PG_bwa = $(pg_dir)/aln_bin/bwa PG_blat = $(pg_dir)/aln_bin/blat PG_bwt = $(pg_dir)/aln_bin/2bwt-builder2.20 PG_DE_stat = $(pg_dir)/DE_statistic PG_convert = $(pg_dir)/convert PS_ps_dir = /PUBLIC/software/RNA/SOAPfuse/SOAPfuse-v1.27/source PS_s01 = $(ps_dir)/SOAPfuse-01-alignWG.pl PS_s02 = $(ps_dir)/SOAPfuse-02-align_unmap_transcript.pl PS_s03 = $(ps_dir)/SOAPfuse-03-align_trim_unmap_transcript.pl PS_s04 = $(ps_dir)/SOAPfuse-04-change_SE.pl PS_s05 = $(ps_dir)/SOAPfuse-05-candidate.pl PS_s06 = $(ps_dir)/SOAPfuse-06-divide_soap_denovo_unmap.pl PS_s07 = $(ps_dir)/SOAPfuse-07-junction_seq_deal.pl PS_s08 = $(ps_dir)/SOAPfuse-08-final_fusionGene.pl PS_s09 = $(ps_dir)/SOAPfuse-09-deeper_analysis.pl PD_alignWG = $(all_out)/alignWG PD_align_unmap_Tran = $(all_out)/align_unmap_Tran PD_align_trim_unmap_Tran = $(all_out)/align_trim_unmap_Tran PD_change_SE = $(all_out)/change_SE PD_candidate = $(all_out)/candidate PD_denovo_unmap = $(all_out)/denovo_unmap PD_junction_seq = $(all_out)/junction_seq PD_final_fusion_genes = $(all_out)/final_fusion_genes PA_all_somatic_mode = no PA_all_postfix_of_tissue = 'N:-N;N:-Normal;N:-B;N:-Blood;T:-CA;T:-C;T:-T;T:-Tumor;T:-Cancer' PA_all_fq_postfix = fq.gz PA_all_process_of_align_software = 12 PA_all_shortest_length_trim_unmap_to = 40 PA_all_maximum_genome_loc_trimmed_read_mapped = 2 PA_all_maximum_genome_loc_intact_read_mapped = 1 PA_all_intron_len_extend_from_exon_edge = 100 PA_s02_realign = yes PA_s05_save_genes_name_with_dot = no PA_s05_save_genes_from_same_family = yes PA_s05_amass_control_of_span_reads = yes PA_s05_maximum_fusion_partner_of_one_gene = 10 PA_s05_the_minimum_span_reads_for_candidate = 5 PA_s06_save_reads_have_mismatch_around_fusepos = yes PA_s06_number_of_flank_bases_near_read_end_for_filter_mismatch = 5 PA_s06_the_maximum_mismatch_in_flank_region = 0 PA_s07_the_minimum_span_reads_for_junction_construction = 5 PA_s07_extended_bases_near_pe_read_end = 0 PA_s07_the_min_cons_for_credible_fuse_region = 0.5 PA_s07_maximum_mismatch_for_align_junction_reads = 3 PA_s07_flank_bases_around_fuse_point_for_check_mismatch = 5 PA_s07_maximum_mismatch_in_flank_region = 0 PA_s07_junc_read_map_both_sides_at_least = 7 PA_s08_number_of_extend_bases = 0 PA_s08_insert_control_sup = no PA_s08_min_sum_reads = 5 PA_s08_min_support_reads_for_both_edge = 1,1 PA_s08_min_support_reads_for_one_edge_one_internal = 2,2 PA_s08_min_support_reads_for_both_internal = 2,2 PA_s08_min_intrachr_distance = 1000 PA_s08_min_bases_covered_both_sides_around_fuse_point = 10 PA_s08_only_remain_edge_case = no PA_s09_draw_fusion_expression_svg = yes 用医口老流程刷脚本 /PUBLIC/source/RNA/med/Pipeline/medpipline1.2/bin/SOAPfuse --version GRCh38 --database /PUBLIC/database/RNA/Med/Database --pipline /PUBLIC/source/RNA/med/Pipeline/medpipline1.2 --rootdir /TJPROJ6/RNA_SH/shouhou/202208/X101SC22031993-Z01-J001 result整理脚本 /PUBLIC/source/RNA/med/Pipeline/medpipline1.2/bin/ASresult --sample config/sample.txt --species hsa --exclude 1,2,3,4,5,6,8 --pipline /PUBLIC/source/RNA/med/Pipeline/medpipline1.2 --rootdir /TJPROJ6/RNA_SH/shouhou/202208/X101SC22031993-Z01-J001 --fusion SOAPfuse 需要修改脚本环境 export PERL5LIB=$PERL5LIB:/TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/source/bin/perl_module