suppressMessages({
library(argparser)
library(tidyverse)
library(circlize)
library(viridis)
library(igraph)
library(ggraph)
library(colormap)
library(ggplot2)
#library(wesanderson)
})
argv <- arg_parser(description = '绘制GO曲线连接图')
argv <- add_argument(argv, '--go',help = '*_GOenrich.xls')
argv <- add_argument(argv, '--diffreult', help = '*vs*_deg.xls')
argv <- add_argument(argv,"--prefix", help="the prefix of outfile")
argv <- parse_args(argv)
go_file <- argv$go
diffresult <- argv$diffresult
prefix <- argv$prefix
go <- read.table(go_file, sep = '\t',header = T, quote = '')[1:5,] #默认绘制前5条GO富集结果
diffresult <- read.table(diffresult, sep = '\t',header = T, quote = '')%>%
dplyr::select(gene_name,log2FoldChange)
# 构建vertices数据
term_vertices <- dplyr::select(go, Description, Count)%>%
mutate(log2FoldChange = 0)%>%
arrange(desc(Count))
colnames(term_vertices)[1:2] <- c('name', 'n')
tmp_gene_vertices <- go$geneName%>%
str_split(pattern = '/', simplify = T)%>%
table()%>%
as.data.frame()
colnames(tmp_gene_vertices) <- c('name', 'n')
## 去掉genename 为空或者-的行
tmp_gene_vertices <- tmp_gene_vertices[tmp_gene_vertices$name!="" &tmp_gene_vertices$name!="-",]
gene_vertices <- merge(tmp_gene_vertices, diffresult, by.x = 'name', by.y='gene_name')%>%
arrange(desc(n))
number_of_bar<-nrow(term_vertices)+nrow(gene_vertices)
vertices <- rbind(gene_vertices, term_vertices)%>%
mutate(id = 1:number_of_bar)
angle= 360 * (vertices$id-0.5) /number_of_bar
vertices$hjust<-ifelse(angle>180, 1, 0)
vertices$angle<-ifelse(angle>180, 90-angle+180, 90-angle)
#构建connect数据
tmp_connect <- data.frame()
for(i in 1:nrow(go)){
term <- go$Description[i]
gene_name <- unlist(str_split(go$geneName[i], pattern = '/'))
tmp_df <- data.frame(term, gene_name)
tmp_connect <-rbind(tmp_connect, tmp_df)
}
## 去除genename是‘-’的行
tmp_connect <- tmp_connect[tmp_connect$gene_name!='-',]
connect <- merge(tmp_connect, diffresult, by = 'gene_name')%>%
mutate(from=term)%>%
dplyr::select(from, gene_name, log2FoldChange, term)
#构建mygraph
mygraph <- graph_from_data_frame( connect, vertices = vertices, directed = FALSE )
# 绘图
P <- ggraph(mygraph,layout = 'linear', circular = TRUE) +
geom_edge_arc(aes(edge_colour=as.factor(term)), edge_alpha=0.5, edge_width=0.3)+
geom_node_point(aes(size=n, fill=as.numeric(log2FoldChange)), shape=21,color='black',alpha=0.9)+
geom_node_text(aes(x = x*1.06, y=y*1.06, label=name, angle=angle,hjust=hjust),size=3)+
scale_fill_gradientn(colours=c('white','red'),guide="colourbar")+
labs(edge_colour='go term', size='size', fill='log2FoldChange')+
coord_fixed()+
theme_minimal() +
theme(
text = element_text(size = 14),
legend.position="right",
panel.grid = element_blank(),
axis.line = element_blank(),
axis.ticks =element_blank(),
axis.text =element_blank(),
axis.title = element_blank(),
plot.margin=unit(c(0,0,0,0), "null"),
panel.spacing=unit(c(0,0,0,0), "null")
)
pdf(file=paste(prefix,'.pdf',sep=''))
P
dev.off()
svg(filename=paste(prefix,'.svg',sep=''))
P
dev.off()
ggsave(filename=paste(prefix,'.png',sep=''),type='cairo-png',plot=P)
#geom_edge_arc 替换为geom_edge_link 绘制径向连接图
# circular = TRUE 改为FALSE,并使用geom_edge_arc可绘制弧长连接图
#scale_size_continuous(breaks=c(1,2,3), labels = c(1,2,3))+
vertices数据结构
name | n | log2FoldChange | id | hjust | angle |
pafo-1 | 1 | 1.831388 | 1 | 1 | -37.6744 |
set-23 | 1 | 1.42319 | 2 | 1 | -41.8605 |
smc-3 | 1 | 1.390854 | 3 | 1 | -46.0465 |
smc-4 | 1 | 1.61223 | 4 | 1 | -50.2326 |
snfc-5 | 1 | 1.611353 | 5 | 1 | -54.4186 |
DNA replication | 29 | 0 | 6 | 1 | -75.3488 |
cellular response to DNA damage stimulus | 29 | 0 | 7 | 1 | -79.5349 |
cellular response to stress | 29 | 0 | 8 | 1 | -83.7209 |
connect 数据结构
from | gene_name | log2FoldChange | term |
chromosome organization | asfl-1 | 2.162437646 | chromosome organization |
chromosome organization | B0025.4 | 1.630395874 | chromosome organization |
DNA metabolic process | C11G6.2 | 4.916024829 | DNA metabolic process |
cellular response to stress | C11G6.2 | 4.916024829 | cellular response to stress |