用户工具

站点工具


soapfuse软件

SOAPfuse

SOAPfuse是华大开发的一款,专门针对human的融合基因进行分析的工具。

在运行SOAPfuse之前,需要准备以下几个文件:

Homo_sapiens_Ensemble_94.fa
Homo_sapiens_Ensemble_94.gtf
cytoBand.txt.gz #http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz(染色体是否一致,chr问题需要处理成一致)
HGNC_Gene_Family_dataset.txt #http://www.genenames.org/cgi-bin/genefamilies/download-all/tsv
HumanRef_refseg_symbols_relationship.list  #gtf中染色体和fa的的对应关系(chr问题)

构建索引:

见 /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/index.sh

export PERL5LIB=$PERL5LIB:/TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/source/bin/perl_module

perl /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/source/SOAPfuse-S00-Generate_SOAPfuse_database.pl -wg /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/Homo_sapiens_Ensemble_94.fa -gtf /TJPROJ6/GB_TR/reference_data/new_pip/Animal/Homo_sapiens/Homo_sapiens_Ensemble_94/Homo_sapiens_Ensemble_94.gtf -cbd /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/cytoBand.txt.gz -gf /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/HGNC_Gene_Family_dataset.txt  -sd /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27 -dd /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db -rft /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db/HumanRef_refseg_symbols_relationship.list

分析需要文件为原始fq.gz文件,填写sample.list,格式如下(\t分隔):

rawdata       DXBR_361_009TB  DXBR_361_009TB   150

目录名         文库名           文件名           reads长度

分析:

见 /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test/test.sh

export PERL5LIB=$PERL5LIB:/TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/source/bin/perl_module

/TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/SOAPfuse-RUN.pl -c /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/config/config.txt -fd /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test -l /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test/sample.list -o /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test

结果见 /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/test/final_fusion_genes/rawdata/result.zip

fusion_soapfuse_result.zip

软件配置文件说明

#DB_db_dir  =  /PUBLIC/database/RNA/Med/Database/Fusion/SOAPfuse/GRCh38
DB_db_dir  =  /TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/db

DB_wg_soap_ref       =     $(db_dir)/WG_index_soap/genome.fa.index
DB_cytoBand          =     $(db_dir)/cytoBand.txt
DB_trans_soap_ref    =     $(db_dir)/transcript_index_soap/transcript.fa.index
DB_trans_bwa_ref     =     $(db_dir)/transcript_index_bwa/transcript.fa
DB_trans_psl         =     $(db_dir)/transcript.psl
DB_trans_gtf         =     $(db_dir)/Gene_annotation.gtf.gz
DB_gene_psl          =     $(db_dir)/gene.psl
DB_gene_fa           =     $(db_dir)/gene.fa
DB_genefamily        =     $(db_dir)/gene_family/gene_family.brief.txt
DB_blast_homo_list   =     $(db_dir)/blast_homo_gene.m8.gz

PG_pg_dir   =    /PUBLIC/software/RNA/SOAPfuse/SOAPfuse-v1.27/source/bin

PG_soap               =     $(pg_dir)/aln_bin/soap2.21
PG_bwa                =     $(pg_dir)/aln_bin/bwa
PG_blat               =     $(pg_dir)/aln_bin/blat
PG_bwt                =     $(pg_dir)/aln_bin/2bwt-builder2.20
PG_DE_stat            =     $(pg_dir)/DE_statistic
PG_convert            =     $(pg_dir)/convert

PS_ps_dir   =   /PUBLIC/software/RNA/SOAPfuse/SOAPfuse-v1.27/source

PS_s01            =     $(ps_dir)/SOAPfuse-01-alignWG.pl
PS_s02            =     $(ps_dir)/SOAPfuse-02-align_unmap_transcript.pl
PS_s03            =     $(ps_dir)/SOAPfuse-03-align_trim_unmap_transcript.pl
PS_s04            =     $(ps_dir)/SOAPfuse-04-change_SE.pl
PS_s05            =     $(ps_dir)/SOAPfuse-05-candidate.pl
PS_s06            =     $(ps_dir)/SOAPfuse-06-divide_soap_denovo_unmap.pl
PS_s07            =     $(ps_dir)/SOAPfuse-07-junction_seq_deal.pl
PS_s08            =     $(ps_dir)/SOAPfuse-08-final_fusionGene.pl
PS_s09            =     $(ps_dir)/SOAPfuse-09-deeper_analysis.pl


PD_alignWG                   =     $(all_out)/alignWG
PD_align_unmap_Tran          =     $(all_out)/align_unmap_Tran
PD_align_trim_unmap_Tran     =     $(all_out)/align_trim_unmap_Tran
PD_change_SE                 =     $(all_out)/change_SE
PD_candidate                 =     $(all_out)/candidate
PD_denovo_unmap              =     $(all_out)/denovo_unmap
PD_junction_seq              =     $(all_out)/junction_seq
PD_final_fusion_genes        =     $(all_out)/final_fusion_genes


PA_all_somatic_mode        =     no

PA_all_postfix_of_tissue   =     'N:-N;N:-Normal;N:-B;N:-Blood;T:-CA;T:-C;T:-T;T:-Tumor;T:-Cancer'

PA_all_fq_postfix     =     fq.gz

PA_all_process_of_align_software     =     12

PA_all_shortest_length_trim_unmap_to     =     40

PA_all_maximum_genome_loc_trimmed_read_mapped     =     2

PA_all_maximum_genome_loc_intact_read_mapped     =     1

PA_all_intron_len_extend_from_exon_edge     =     100

PA_s02_realign     =     yes

PA_s05_save_genes_name_with_dot     =     no

PA_s05_save_genes_from_same_family     =     yes

PA_s05_amass_control_of_span_reads     =     yes

PA_s05_maximum_fusion_partner_of_one_gene     =     10

PA_s05_the_minimum_span_reads_for_candidate     =     5

PA_s06_save_reads_have_mismatch_around_fusepos     =     yes

PA_s06_number_of_flank_bases_near_read_end_for_filter_mismatch     =     5

PA_s06_the_maximum_mismatch_in_flank_region     =     0

PA_s07_the_minimum_span_reads_for_junction_construction     =     5

PA_s07_extended_bases_near_pe_read_end     =     0

PA_s07_the_min_cons_for_credible_fuse_region     =     0.5

PA_s07_maximum_mismatch_for_align_junction_reads     =     3

PA_s07_flank_bases_around_fuse_point_for_check_mismatch     =     5

PA_s07_maximum_mismatch_in_flank_region     =     0

PA_s07_junc_read_map_both_sides_at_least     =     7

PA_s08_number_of_extend_bases     =     0

PA_s08_insert_control_sup     =   no

PA_s08_min_sum_reads     =     5

PA_s08_min_support_reads_for_both_edge     =     1,1

PA_s08_min_support_reads_for_one_edge_one_internal     =     2,2

PA_s08_min_support_reads_for_both_internal     =     2,2

PA_s08_min_intrachr_distance     =     1000

PA_s08_min_bases_covered_both_sides_around_fuse_point     =     10

PA_s08_only_remain_edge_case     =     no

PA_s09_draw_fusion_expression_svg     =     yes

用医口老流程刷脚本

/PUBLIC/source/RNA/med/Pipeline/medpipline1.2/bin/SOAPfuse --version GRCh38 --database /PUBLIC/database/RNA/Med/Database --pipline /PUBLIC/source/RNA/med/Pipeline/medpipline1.2 --rootdir /TJPROJ6/RNA_SH/shouhou/202208/X101SC22031993-Z01-J001

result整理脚本

/PUBLIC/source/RNA/med/Pipeline/medpipline1.2/bin/ASresult --sample config/sample.txt --species hsa --exclude 1,2,3,4,5,6,8 --pipline /PUBLIC/source/RNA/med/Pipeline/medpipline1.2 --rootdir /TJPROJ6/RNA_SH/shouhou/202208/X101SC22031993-Z01-J001 --fusion SOAPfuse

需要修改脚本环境

export PERL5LIB=$PERL5LIB:/TJPROJ6/RNA_SH/script_dir/SOAPfuse/SOAPfuse-v1.27/source/bin/perl_module
soapfuse软件.txt · 最后更改: 2022/08/23 02:06 由 fengjie