富集圈图

go_circlize.pdf

/TJPROJ6/RNA_SH/personal_dir/fengjie/Personal_analysis/circlize

使用说明:

usage: ./circlize_plot [--] [--help] [--opts OPTS] [--stat STAT] [--cutoff CUTOFF] [--type TYPE] [--prefix PREFIX]




flags:
  -h, --help			show this help message and exit

optional arguments:
  -x, --opts OPTS			RDS file containing argument values
  -s, --stat STAT			the stat file
  -c, --cutoff CUTOFF			the cutoff value
  -t, --type TYPE			the plot type
  -p, --prefix PREFIX			the prefix of outfile

数据文件为富集结果中的*all_GOenrich.xls,需要时ALL下面的文件。

#!/usr/bin/env Rscript


suppressMessages({
  library(argparser)
  library(circlize)
  library(grid)
  library(graphics)
  library(ComplexHeatmap)
  })

argv <- arg_parser('')
argv <- add_argument(argv,"--stat", help="the stat file")
argv <- add_argument(argv,"--cutoff", help="the cutoff value")
argv <- add_argument(argv,"--type", help="the plot type")
argv <- add_argument(argv,"--prefix", help="the prefix of outfile")
argv <- parse_args(argv)

stat <- argv$stat
type <- argv$type
cutoff <- argv$cutoff
prefix <- argv$prefix

#test
#setwd("/Users/fengjie/Desktop/Research/circlize")
#stat <- "sinrf2vssinc.all_KEGGenrich.xls"
#type <- "KEGG"
#cutoff <- "padj"
#prefix <- "test_kegg"

data <- read.delim(stat,head=TRUE,sep='\t',quote='')

if (cutoff=='padj') {data <- data[order(data$padj,decreasing=F),]}
if (cutoff=='pvalue') {data <- data[order(data$pvalue,decreasing=F),]}

if (type=='GO'){
  bp <- subset(data,Category=='BP')[1:10,]
  cc <- subset(data,Category=='CC')[1:10,]
  mf <- subset(data,Category=='MF')[1:10,]
  data <- rbind(bp,mf,cc)
  data_new <- data[,c(2,1)]
  ko_color <- c(rep('#F7CC13', 10), rep('#954572', 10), rep('#0796E0', 10))
}


if (type=='KEGG') {
  data <- data[1:30,]
  data_new <- data[,c(1,2)]
  ko_color <- c(rep('#00FFFF', 30))
}

data_new$gene_num.min <- 0
data_new$gene_num.max <- as.numeric(unlist(lapply(data$BgRatio,function(x) strsplit(x,"/")[[1]][2])))
data_new$gene_num.rich <- as.numeric(unlist(lapply(data$BgRatio,function(x) strsplit(x,"/")[[1]][1])))

if (cutoff=='padj') {data_new$"-log10.p" <- -log10(data$padj)}
if (cutoff=='pvalue') {data_new$"-log10.p" <- -log10(data$pvalue)}

data_new$up.regulated <- data$Up
data_new$down.regulated <- data$Down
data_new$rich.factor <- as.numeric(unlist(lapply(data$GeneRatio,function(x) strsplit(x,"/")[[1]][1])))/data_new$gene_num.rich
colnames(data_new)[c(1,2)] <- c("id", "category")
if (type=='KEGG') {data_new$category <- "KEGG"}

ID <- as.character(data_new$id)
data_new$id <- factor(ID, levels = ID)

dat <- data_new
rownames(dat) <- data_new$id
pdf(paste(prefix,'_circlize.pdf',sep = ""), width = 12, height = 15)

circle_size = unit(1, 'snpc')
circos.par(gap.degree = 2, start.degree = 90)


plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max')]
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)
circos.track(
  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,
  panel.fun = function(x, y) {
    ylim = get.cell.meta.data('ycenter')
    xlim = get.cell.meta.data('xcenter')
    sector.name = get.cell.meta.data('sector.index')
    circos.axis(h = 'top', labels.cex = 0.4, major.tick.percentage = 0.4, labels.niceFacing = FALSE)
    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
  } )


plot_data <- dat[c('id', 'gene_num.min', 'gene_num.rich', '-log10.p')]
label_data <- dat['gene_num.rich']
p_max <- round(max(dat$'-log10.p')) + 1
colorsChoice <- colorRampPalette(c('#FF906F', '#861D30'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('ycenter')
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
  } )


dat$all.regulated <- dat$up.regulated + dat$down.regulated
dat$up.proportion <- dat$up.regulated / dat$all.regulated
dat$down.proportion <- dat$down.regulated / dat$all.regulated

dat$up <- dat$up.proportion * dat$gene_num.max
plot_data_up <- dat[c('id', 'gene_num.min', 'up')]
names(plot_data_up) <- c('id', 'start', 'end')
plot_data_up$type <- 1

dat$down <- dat$down.proportion * dat$gene_num.max + dat$up
plot_data_down <- dat[c('id', 'up', 'down')]
names(plot_data_down) <- c('id', 'start', 'end')
plot_data_down$type <- 2

plot_data <- rbind(plot_data_up, plot_data_down)
label_data <- dat[c('up', 'down', 'up.regulated', 'down.regulated')]
color_assign <- colorRamp2(breaks = c(1, 2), col = c('#671166', '#7F8CCB'))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('cell.bottom.radius') - 0.5
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),3]
    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
    xlim = (label_data[get.cell.meta.data('sector.index'),2]+label_data[get.cell.meta.data('sector.index'),1]) / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),4]
    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
  } )


plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')]
label_data <- dat['category']
color_assign <- c('BP' = '#F7CC13', 'MF' = '#954572', 'CC' = '#0796E0', 'KEGG' = '#00FFFF')

circos.genomicTrack(
  plot_data, ylim = c(0, 1), track.height = 0.3, bg.col = 'gray95', bg.border = NA,
  panel.fun = function(region, value, ...) {
    sector.name = get.cell.meta.data('sector.index')
    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,1]], border = NA, ytop.column = 1, ybottom = 0, ...)
    circos.lines(c(0, max(region)), c(0.5, 0.5), col = 'gray', lwd = 0.3)
  } )

circos.clear()

category_legend <- Legend(
  labels = c('BP', 'MF','CC'),
  type = 'points', pch = NA, background = c('#F7CC13', '#954572','#0796E0'),
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))

updown_legend <- Legend(
  labels = c('Up-regulated', 'Down-regulated'),
  type = 'points', pch = NA, background = c('#671166', '#7F8CCB'),
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))

if (cutoff=='padj') {p_title <- '-Log10(padj)'}
if (cutoff=='pvalue') {p_title <- '-Log10(Pvalue)'}

pvalue_legend <- Legend(
  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
                       colorRampPalette(c('#FF906F', '#861D30'))(6)),
  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
  title_gp = gpar(fontsize = 9), title_position = 'topleft', title = p_title)

if (type=='GO') {
  lgd_list_vertical <- packLegend(category_legend, updown_legend, pvalue_legend)
}
if (type=='KEGG') {
  lgd_list_vertical <- packLegend(updown_legend, pvalue_legend)
}
pushViewport(viewport(x = 0.5, y = 0.5))
grid.draw(lgd_list_vertical)
upViewport()
dev.off()

结果readme文件

第一圈展示了富集到的通路,并显示了通路的名称(或id)、通路中所含背景基因总数(展示为刻度轴)、通路所属的高级分类单元(以不同颜色标识);

第二圈展示了富集到目标通路中,所富集的基因的总数(以不同长度的区块反映了数量信息,并将数值标识在区块中),以及富集分析的显著性信息(标记为渐变颜色);

第三圈展示了富集到的通路中,显著上下调的基因数量和比例的信息(以不同长度的区块分别表示上下调基因的相对占比,并用不同颜色分别标识上下调基因,内侧分别显示了上下调基因数量数值);

第四圈展示了各个通路的富集因子,高度反映得分值,颜色指示了通路所属的高级分类单元。