用户工具

站点工具


曲线连接图
suppressMessages({
  library(argparser)
  library(tidyverse)
  library(circlize)
  library(viridis)
  library(igraph)
  library(ggraph)
  library(colormap)
  library(ggplot2)
  #library(wesanderson)
})

argv <- arg_parser(description = '绘制GO曲线连接图')
argv <- add_argument(argv, '--go',help = '*_GOenrich.xls')
argv <- add_argument(argv, '--diffreult', help = '*vs*_deg.xls')
argv <- add_argument(argv,"--prefix", help="the prefix of outfile")
argv <- parse_args(argv)

go_file <- argv$go
diffresult <- argv$diffresult
prefix <- argv$prefix

go <- read.table(go_file, sep = '\t',header = T, quote = '')[1:5,]  #默认绘制前5条GO富集结果
diffresult <- read.table(diffresult, sep = '\t',header = T, quote = '')%>%
  dplyr::select(gene_name,log2FoldChange)


# 构建vertices数据
term_vertices <- dplyr::select(go, Description, Count)%>%
  mutate(log2FoldChange = 0)%>%
  arrange(desc(Count))
  
colnames(term_vertices)[1:2] <- c('name', 'n')

tmp_gene_vertices <- go$geneName%>%
  str_split(pattern = '/', simplify = T)%>%
  table()%>%
  as.data.frame()
colnames(tmp_gene_vertices) <- c('name', 'n')
## 去掉genename 为空或者-的行
tmp_gene_vertices <- tmp_gene_vertices[tmp_gene_vertices$name!="" &tmp_gene_vertices$name!="-",]


gene_vertices <- merge(tmp_gene_vertices, diffresult, by.x = 'name', by.y='gene_name')%>%
  arrange(desc(n))

number_of_bar<-nrow(term_vertices)+nrow(gene_vertices)
vertices <- rbind(gene_vertices, term_vertices)%>%
  mutate(id = 1:number_of_bar)

angle= 360 * (vertices$id-0.5) /number_of_bar    
vertices$hjust<-ifelse(angle>180, 1, 0)
vertices$angle<-ifelse(angle>180, 90-angle+180, 90-angle)


#构建connect数据
tmp_connect <- data.frame()
for(i in 1:nrow(go)){
  term <- go$Description[i]
  gene_name <- unlist(str_split(go$geneName[i], pattern = '/'))
  tmp_df <- data.frame(term, gene_name)
  tmp_connect <-rbind(tmp_connect, tmp_df)
}
## 去除genename是‘-’的行
tmp_connect <- tmp_connect[tmp_connect$gene_name!='-',]
connect <- merge(tmp_connect, diffresult, by = 'gene_name')%>%
  mutate(from=term)%>%
  dplyr::select(from, gene_name, log2FoldChange, term)


#构建mygraph
mygraph <- graph_from_data_frame( connect, vertices = vertices, directed = FALSE )


# 绘图
P <- ggraph(mygraph,layout = 'linear', circular = TRUE) +
  geom_edge_arc(aes(edge_colour=as.factor(term)), edge_alpha=0.5, edge_width=0.3)+
  geom_node_point(aes(size=n, fill=as.numeric(log2FoldChange)), shape=21,color='black',alpha=0.9)+
  geom_node_text(aes(x = x*1.06, y=y*1.06, label=name, angle=angle,hjust=hjust),size=3)+
  scale_fill_gradientn(colours=c('white','red'),guide="colourbar")+
  labs(edge_colour='go term', size='size', fill='log2FoldChange')+
  coord_fixed()+
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    legend.position="right",
    panel.grid = element_blank(),
    axis.line = element_blank(),
    axis.ticks =element_blank(),
    axis.text =element_blank(),
    axis.title = element_blank(),
    plot.margin=unit(c(0,0,0,0), "null"),
    panel.spacing=unit(c(0,0,0,0), "null")
  )

pdf(file=paste(prefix,'.pdf',sep=''))
P
dev.off()

svg(filename=paste(prefix,'.svg',sep=''))
P
dev.off()
ggsave(filename=paste(prefix,'.png',sep=''),type='cairo-png',plot=P)


#geom_edge_arc 替换为geom_edge_link 绘制径向连接图
# circular = TRUE 改为FALSE,并使用geom_edge_arc可绘制弧长连接图
#scale_size_continuous(breaks=c(1,2,3), labels = c(1,2,3))+

vertices数据结构

name n log2FoldChange id hjust angle
pafo-1 1 1.831388 1 1 -37.6744
set-23 1 1.42319 2 1 -41.8605
smc-3 1 1.390854 3 1 -46.0465
smc-4 1 1.61223 4 1 -50.2326
snfc-5 1 1.611353 5 1 -54.4186
DNA replication 29 0 6 1 -75.3488
cellular response to DNA damage stimulus 29 0 7 1 -79.5349
cellular response to stress 29 0 8 1 -83.7209

connect 数据结构

from gene_name log2FoldChange term
chromosome organization asfl-1 2.162437646 chromosome organization
chromosome organization B0025.4 1.630395874 chromosome organization
DNA metabolic process C11G6.2 4.916024829 DNA metabolic process
cellular response to stress C11G6.2 4.916024829 cellular response to stress

曲线连接图.txt · 最后更改: 2023/04/13 09:38 由 lizhengnan