====富集圈图====
{{ :go_circlize.pdf |}}
{{::go圈图.png?400|}}
/TJPROJ6/RNA_SH/personal_dir/fengjie/Personal_analysis/circlize
使用说明:
usage: ./circlize_plot [--] [--help] [--opts OPTS] [--stat STAT] [--cutoff CUTOFF] [--type TYPE] [--prefix PREFIX]
flags:
-h, --help show this help message and exit
optional arguments:
-x, --opts OPTS RDS file containing argument values
-s, --stat STAT the stat file
-c, --cutoff CUTOFF the cutoff value
-t, --type TYPE the plot type
-p, --prefix PREFIX the prefix of outfile
数据文件为富集结果中的*all_GOenrich.xls,需要时ALL下面的文件。
#!/usr/bin/env Rscript
suppressMessages({
library(argparser)
library(circlize)
library(grid)
library(graphics)
library(ComplexHeatmap)
})
argv <- arg_parser('')
argv <- add_argument(argv,"--stat", help="the stat file")
argv <- add_argument(argv,"--cutoff", help="the cutoff value")
argv <- add_argument(argv,"--type", help="the plot type")
argv <- add_argument(argv,"--prefix", help="the prefix of outfile")
argv <- parse_args(argv)
stat <- argv$stat
type <- argv$type
cutoff <- argv$cutoff
prefix <- argv$prefix
#test
#setwd("/Users/fengjie/Desktop/Research/circlize")
#stat <- "sinrf2vssinc.all_KEGGenrich.xls"
#type <- "KEGG"
#cutoff <- "padj"
#prefix <- "test_kegg"
data <- read.delim(stat,head=TRUE,sep='\t',quote='')
if (cutoff=='padj') {data <- data[order(data$padj,decreasing=F),]}
if (cutoff=='pvalue') {data <- data[order(data$pvalue,decreasing=F),]}
if (type=='GO'){
bp <- subset(data,Category=='BP')[1:10,]
cc <- subset(data,Category=='CC')[1:10,]
mf <- subset(data,Category=='MF')[1:10,]
data <- rbind(bp,mf,cc)
data_new <- data[,c(2,1)]
ko_color <- c(rep('#F7CC13', 10), rep('#954572', 10), rep('#0796E0', 10))
}
if (type=='KEGG') {
data <- data[1:30,]
data_new <- data[,c(1,2)]
ko_color <- c(rep('#00FFFF', 30))
}
data_new$gene_num.min <- 0
data_new$gene_num.max <- as.numeric(unlist(lapply(data$BgRatio,function(x) strsplit(x,"/")[[1]][2])))
data_new$gene_num.rich <- as.numeric(unlist(lapply(data$BgRatio,function(x) strsplit(x,"/")[[1]][1])))
if (cutoff=='padj') {data_new$"-log10.p" <- -log10(data$padj)}
if (cutoff=='pvalue') {data_new$"-log10.p" <- -log10(data$pvalue)}
data_new$up.regulated <- data$Up
data_new$down.regulated <- data$Down
data_new$rich.factor <- as.numeric(unlist(lapply(data$GeneRatio,function(x) strsplit(x,"/")[[1]][1])))/data_new$gene_num.rich
colnames(data_new)[c(1,2)] <- c("id", "category")
if (type=='KEGG') {data_new$category <- "KEGG"}
ID <- as.character(data_new$id)
data_new$id <- factor(ID, levels = ID)
dat <- data_new
rownames(dat) <- data_new$id
pdf(paste(prefix,'_circlize.pdf',sep = ""), width = 12, height = 15)
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 2, start.degree = 90)
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max')]
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)
circos.track(
ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,
panel.fun = function(x, y) {
ylim = get.cell.meta.data('ycenter')
xlim = get.cell.meta.data('xcenter')
sector.name = get.cell.meta.data('sector.index')
circos.axis(h = 'top', labels.cex = 0.4, major.tick.percentage = 0.4, labels.niceFacing = FALSE)
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
} )
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.rich', '-log10.p')]
label_data <- dat['gene_num.rich']
p_max <- round(max(dat$'-log10.p')) + 1
colorsChoice <- colorRampPalette(c('#FF906F', '#861D30'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
} )
dat$all.regulated <- dat$up.regulated + dat$down.regulated
dat$up.proportion <- dat$up.regulated / dat$all.regulated
dat$down.proportion <- dat$down.regulated / dat$all.regulated
dat$up <- dat$up.proportion * dat$gene_num.max
plot_data_up <- dat[c('id', 'gene_num.min', 'up')]
names(plot_data_up) <- c('id', 'start', 'end')
plot_data_up$type <- 1
dat$down <- dat$down.proportion * dat$gene_num.max + dat$up
plot_data_down <- dat[c('id', 'up', 'down')]
names(plot_data_down) <- c('id', 'start', 'end')
plot_data_down$type <- 2
plot_data <- rbind(plot_data_up, plot_data_down)
label_data <- dat[c('up', 'down', 'up.regulated', 'down.regulated')]
color_assign <- colorRamp2(breaks = c(1, 2), col = c('#671166', '#7F8CCB'))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('cell.bottom.radius') - 0.5
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),3]
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
xlim = (label_data[get.cell.meta.data('sector.index'),2]+label_data[get.cell.meta.data('sector.index'),1]) / 2
sector.name = label_data[get.cell.meta.data('sector.index'),4]
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
} )
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')]
label_data <- dat['category']
color_assign <- c('BP' = '#F7CC13', 'MF' = '#954572', 'CC' = '#0796E0', 'KEGG' = '#00FFFF')
circos.genomicTrack(
plot_data, ylim = c(0, 1), track.height = 0.3, bg.col = 'gray95', bg.border = NA,
panel.fun = function(region, value, ...) {
sector.name = get.cell.meta.data('sector.index')
circos.genomicRect(region, value, col = color_assign[label_data[sector.name,1]], border = NA, ytop.column = 1, ybottom = 0, ...)
circos.lines(c(0, max(region)), c(0.5, 0.5), col = 'gray', lwd = 0.3)
} )
circos.clear()
category_legend <- Legend(
labels = c('BP', 'MF','CC'),
type = 'points', pch = NA, background = c('#F7CC13', '#954572','#0796E0'),
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
updown_legend <- Legend(
labels = c('Up-regulated', 'Down-regulated'),
type = 'points', pch = NA, background = c('#671166', '#7F8CCB'),
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
if (cutoff=='padj') {p_title <- '-Log10(padj)'}
if (cutoff=='pvalue') {p_title <- '-Log10(Pvalue)'}
pvalue_legend <- Legend(
col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
colorRampPalette(c('#FF906F', '#861D30'))(6)),
legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
title_gp = gpar(fontsize = 9), title_position = 'topleft', title = p_title)
if (type=='GO') {
lgd_list_vertical <- packLegend(category_legend, updown_legend, pvalue_legend)
}
if (type=='KEGG') {
lgd_list_vertical <- packLegend(updown_legend, pvalue_legend)
}
pushViewport(viewport(x = 0.5, y = 0.5))
grid.draw(lgd_list_vertical)
upViewport()
dev.off()
结果readme文件
第一圈展示了富集到的通路,并显示了通路的名称(或id)、通路中所含背景基因总数(展示为刻度轴)、通路所属的高级分类单元(以不同颜色标识);
第二圈展示了富集到目标通路中,所富集的基因的总数(以不同长度的区块反映了数量信息,并将数值标识在区块中),以及富集分析的显著性信息(标记为渐变颜色);
第三圈展示了富集到的通路中,显著上下调的基因数量和比例的信息(以不同长度的区块分别表示上下调基因的相对占比,并用不同颜色分别标识上下调基因,内侧分别显示了上下调基因数量数值);
第四圈展示了各个通路的富集因子,高度反映得分值,颜色指示了通路所属的高级分类单元。