用户工具

站点工具


个性化条目:基因表达相关性图

基因表达相关性图

做相关性分析,并得出p值,交付图片如下

脚本使用:

./corr.test.r –input merged_fpkm.xls –outdir /TJPROJ6/RNA_SH/personal_dir/fengjie/Personal_analysis/Corr.test –type pearson

脚本

#!/usr/bin/env Rscript

suppressMessages({
library(argparser)
library(psych)
library(reshape2)
library(ggplot2)})

argv <- arg_parser('')
argv <- add_argument(argv,"--input", help="the fpkm file")
argv <- add_argument(argv,"--outdir", help="the prefix of outfile")
argv <- add_argument(argv,'--type',help='pearson or spearman')
argv <- parse_args(argv)

exp_data <- argv$input
outdir <- argv$outdir
type <- argv$type
#setwd("/Users/fengjie/Desktop/Research/correlation/Corr.test")
#exp_data <- "merged_fpkm.xls"
#outdir <- getwd()
#type <- "pearson"
exp_data=read.table(exp_data, header=T, sep='\t')
rownames(exp_data)=exp_data[,1]
dims<-dim(exp_data)
nc=dims[2]
sam_num=nc-1
exp_data=exp_data[,2:nc]
dat_all=log10(exp_data+1)
plot_num=sam_num*(sam_num-1)/2
cor_table1=matrix(1,sam_num,sam_num)
for (i in 1:(sam_num-1)){
	for (j in (i+1):sam_num){
		for (j in (i+1):sam_num){
		  dat=dat_all[,c(i,j)]
			loc=max(dat[,2])
			dat=data.frame(dat)
			p<- ggplot(dat)
			p<- p + aes_string(x=colnames(dat)[1],y=colnames(dat)[2])
			model <- coef(lm(dat[[2]] ~ dat[[1]], data = dat))
			intercept_val <- as.numeric(model)[1]
			slope_val <- as.numeric(model)[2]
			p<- p + geom_point(size=1.5,alpha=0.3,colour="#4876FF") + geom_abline(intercept=intercept_val,slope=slope_val,linetype=2,colour="#FF7F50") + geom_rug(size=0.5,alpha=0.01,colour="#4876FF")
			result <- corr.test(dat[ ,1], dat[ ,2], use = "pairwise",method=type,adjust = "BH")
			R2 <- signif((result$r)^2,3)
			P <- signif((result$p)^2,3)
			cor_table1[i,j]=R2
			cor_table1[j,i]=R2
			p <- p + labs(title=paste(colnames(dat_all)[i]," vs ",colnames(dat_all)[j]))
			p <- p + xlab(paste("log10(FPKM+1),"," (",colnames(dat_all)[i],")")) + ylab(paste("log10(FPKM+1),"," (",colnames(dat_all)[j],")"))
			y1=loc
			p<- p+ annotate("text",adj=0,x=0.02,y=y1, label=paste("R^2==",R2,sep=''), parse=TRUE)+ annotate("text",adj=0,x=0.02,y=y1-2, label=paste("p==",P,sep=''), parse=TRUE)
			old_theme <- theme_update(
				panel.background = element_rect(fill = "transparent", color = "NA"),
				panel.grid.minor=element_blank(),
				panel.grid.major=element_blank(),
				plot.background = element_rect(fill = "transparent", color = "NA"),
				axis.line=element_line(),
				axis.ticks=element_line(),
				legend.key = element_blank()
				)
			fpdf=paste(outdir,'/',colnames(dat_all)[i],'_vs_',colnames(dat_all)[j],'.scatter.pdf',sep='')
			fpng=paste(outdir,'/',colnames(dat_all)[i],'_vs_',colnames(dat_all)[j],'.scatter.png',sep='')
			ggsave(filename=fpdf, plot=p)
			ggsave(filename=fpng,type="cairo-png", plot=p)
		}
	}
}
cor_table1<-data.frame(cor_table1)
colnames(cor_table1)<-colnames(dat_all)
cor_table1$coefficient<-colnames(dat_all)
cor_table1<-cor_table1[,c(sam_num+1,1:sam_num)]
names(cor_table1)[1]<-"R^2"
ft=paste(outdir,'/correlation.xls',sep='')
write.table(cor_table1,file=ft,quote=F,row.name=F, sep="\t")
if(sam_num<5){
	size_number=5
}else if(sam_num<=10){
	size_number=4
}else if(sam_num<15){
	size_number=3
}else if(sam_num<18){
	size_number=2
}else{
	size_number=1.5
}
heat<-cor_table1
order<-heat[,1]
order<-as.vector(as.character(order))
df<-melt(heat)
colnames(df)<-c("sample1","sample2","correlation")
p<-ggplot(df,aes(sample1,sample2,label=correlation))+
geom_tile(aes(fill = correlation),colour="white") +
scale_fill_gradient(name=expression(R^2),low="white",high="#4876FF")+
theme(panel.background = element_rect(fill='white', colour='white')) +
labs(x="",y="", title="Pearson correlation between samples")+
theme(legend.position="right",axis.text.x=element_text(angle=45,vjust=1,hjust=1))+coord_fixed()+
geom_text(size=size_number)+xlim(order)+ylim(order)
ggsave(filename=paste(outdir,'/correlation.pdf',sep=''),plot=p, height=max(6,round(nrow(sam_num)/3)), width=max(6,round(nrow(sam_num)/3)))
ggsave(filename=paste(outdir,'/correlation.png',sep=''),type="cairo-png", plot=p, height=max(6,round(nrow(sam_num)/3)), width=max(6,round(nrow(sam_num)/3)))
个性化条目/基因表达相关性图.txt · 最后更改: 2023/03/28 10:11 由 fengjie