/TJPROJ6/RNA_SH/personal_dir/fengjie/Personal_analysis/circlize
使用说明:
usage: ./circlize_plot [--] [--help] [--opts OPTS] [--stat STAT] [--cutoff CUTOFF] [--type TYPE] [--prefix PREFIX] flags: -h, --help show this help message and exit optional arguments: -x, --opts OPTS RDS file containing argument values -s, --stat STAT the stat file -c, --cutoff CUTOFF the cutoff value -t, --type TYPE the plot type -p, --prefix PREFIX the prefix of outfile
数据文件为富集结果中的*all_GOenrich.xls,需要时ALL下面的文件。
#!/usr/bin/env Rscript suppressMessages({ library(argparser) library(circlize) library(grid) library(graphics) library(ComplexHeatmap) }) argv <- arg_parser('') argv <- add_argument(argv,"--stat", help="the stat file") argv <- add_argument(argv,"--cutoff", help="the cutoff value") argv <- add_argument(argv,"--type", help="the plot type") argv <- add_argument(argv,"--prefix", help="the prefix of outfile") argv <- parse_args(argv) stat <- argv$stat type <- argv$type cutoff <- argv$cutoff prefix <- argv$prefix #test #setwd("/Users/fengjie/Desktop/Research/circlize") #stat <- "sinrf2vssinc.all_KEGGenrich.xls" #type <- "KEGG" #cutoff <- "padj" #prefix <- "test_kegg" data <- read.delim(stat,head=TRUE,sep='\t',quote='') if (cutoff=='padj') {data <- data[order(data$padj,decreasing=F),]} if (cutoff=='pvalue') {data <- data[order(data$pvalue,decreasing=F),]} if (type=='GO'){ bp <- subset(data,Category=='BP')[1:10,] cc <- subset(data,Category=='CC')[1:10,] mf <- subset(data,Category=='MF')[1:10,] data <- rbind(bp,mf,cc) data_new <- data[,c(2,1)] ko_color <- c(rep('#F7CC13', 10), rep('#954572', 10), rep('#0796E0', 10)) } if (type=='KEGG') { data <- data[1:30,] data_new <- data[,c(1,2)] ko_color <- c(rep('#00FFFF', 30)) } data_new$gene_num.min <- 0 data_new$gene_num.max <- as.numeric(unlist(lapply(data$BgRatio,function(x) strsplit(x,"/")[[1]][2]))) data_new$gene_num.rich <- as.numeric(unlist(lapply(data$BgRatio,function(x) strsplit(x,"/")[[1]][1]))) if (cutoff=='padj') {data_new$"-log10.p" <- -log10(data$padj)} if (cutoff=='pvalue') {data_new$"-log10.p" <- -log10(data$pvalue)} data_new$up.regulated <- data$Up data_new$down.regulated <- data$Down data_new$rich.factor <- as.numeric(unlist(lapply(data$GeneRatio,function(x) strsplit(x,"/")[[1]][1])))/data_new$gene_num.rich colnames(data_new)[c(1,2)] <- c("id", "category") if (type=='KEGG') {data_new$category <- "KEGG"} ID <- as.character(data_new$id) data_new$id <- factor(ID, levels = ID) dat <- data_new rownames(dat) <- data_new$id pdf(paste(prefix,'_circlize.pdf',sep = ""), width = 12, height = 15) circle_size = unit(1, 'snpc') circos.par(gap.degree = 2, start.degree = 90) plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max')] circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1) circos.track( ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color, panel.fun = function(x, y) { ylim = get.cell.meta.data('ycenter') xlim = get.cell.meta.data('xcenter') sector.name = get.cell.meta.data('sector.index') circos.axis(h = 'top', labels.cex = 0.4, major.tick.percentage = 0.4, labels.niceFacing = FALSE) circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) } ) plot_data <- dat[c('id', 'gene_num.min', 'gene_num.rich', '-log10.p')] label_data <- dat['gene_num.rich'] p_max <- round(max(dat$'-log10.p')) + 1 colorsChoice <- colorRampPalette(c('#FF906F', '#861D30')) color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1)) circos.genomicTrackPlotRegion( plot_data, track.height = 0.08, bg.border = NA, stack = TRUE, panel.fun = function(region, value, ...) { circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...) ylim = get.cell.meta.data('ycenter') xlim = label_data[get.cell.meta.data('sector.index'),1] / 2 sector.name = label_data[get.cell.meta.data('sector.index'),1] circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) } ) dat$all.regulated <- dat$up.regulated + dat$down.regulated dat$up.proportion <- dat$up.regulated / dat$all.regulated dat$down.proportion <- dat$down.regulated / dat$all.regulated dat$up <- dat$up.proportion * dat$gene_num.max plot_data_up <- dat[c('id', 'gene_num.min', 'up')] names(plot_data_up) <- c('id', 'start', 'end') plot_data_up$type <- 1 dat$down <- dat$down.proportion * dat$gene_num.max + dat$up plot_data_down <- dat[c('id', 'up', 'down')] names(plot_data_down) <- c('id', 'start', 'end') plot_data_down$type <- 2 plot_data <- rbind(plot_data_up, plot_data_down) label_data <- dat[c('up', 'down', 'up.regulated', 'down.regulated')] color_assign <- colorRamp2(breaks = c(1, 2), col = c('#671166', '#7F8CCB')) circos.genomicTrackPlotRegion( plot_data, track.height = 0.08, bg.border = NA, stack = TRUE, panel.fun = function(region, value, ...) { circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...) ylim = get.cell.meta.data('cell.bottom.radius') - 0.5 xlim = label_data[get.cell.meta.data('sector.index'),1] / 2 sector.name = label_data[get.cell.meta.data('sector.index'),3] circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) xlim = (label_data[get.cell.meta.data('sector.index'),2]+label_data[get.cell.meta.data('sector.index'),1]) / 2 sector.name = label_data[get.cell.meta.data('sector.index'),4] circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) } ) plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')] label_data <- dat['category'] color_assign <- c('BP' = '#F7CC13', 'MF' = '#954572', 'CC' = '#0796E0', 'KEGG' = '#00FFFF') circos.genomicTrack( plot_data, ylim = c(0, 1), track.height = 0.3, bg.col = 'gray95', bg.border = NA, panel.fun = function(region, value, ...) { sector.name = get.cell.meta.data('sector.index') circos.genomicRect(region, value, col = color_assign[label_data[sector.name,1]], border = NA, ytop.column = 1, ybottom = 0, ...) circos.lines(c(0, max(region)), c(0.5, 0.5), col = 'gray', lwd = 0.3) } ) circos.clear() category_legend <- Legend( labels = c('BP', 'MF','CC'), type = 'points', pch = NA, background = c('#F7CC13', '#954572','#0796E0'), labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm')) updown_legend <- Legend( labels = c('Up-regulated', 'Down-regulated'), type = 'points', pch = NA, background = c('#671166', '#7F8CCB'), labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm')) if (cutoff=='padj') {p_title <- '-Log10(padj)'} if (cutoff=='pvalue') {p_title <- '-Log10(Pvalue)'} pvalue_legend <- Legend( col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0), colorRampPalette(c('#FF906F', '#861D30'))(6)), legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8), title_gp = gpar(fontsize = 9), title_position = 'topleft', title = p_title) if (type=='GO') { lgd_list_vertical <- packLegend(category_legend, updown_legend, pvalue_legend) } if (type=='KEGG') { lgd_list_vertical <- packLegend(updown_legend, pvalue_legend) } pushViewport(viewport(x = 0.5, y = 0.5)) grid.draw(lgd_list_vertical) upViewport() dev.off()
结果readme文件
第一圈展示了富集到的通路,并显示了通路的名称(或id)、通路中所含背景基因总数(展示为刻度轴)、通路所属的高级分类单元(以不同颜色标识); 第二圈展示了富集到目标通路中,所富集的基因的总数(以不同长度的区块反映了数量信息,并将数值标识在区块中),以及富集分析的显著性信息(标记为渐变颜色); 第三圈展示了富集到的通路中,显著上下调的基因数量和比例的信息(以不同长度的区块分别表示上下调基因的相对占比,并用不同颜色分别标识上下调基因,内侧分别显示了上下调基因数量数值); 第四圈展示了各个通路的富集因子,高度反映得分值,颜色指示了通路所属的高级分类单元。