个性化条目:cibersort免疫浸润分析

使用方法

/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/cibersort/cibersort --fpkm /TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/cibersort/mus_fpkm.xls --sample ZMS_DO1_1,ZMS_DO1_2,ZMS_DO1_3,ZMS_SC1_1,ZMS_SC1_2,ZMS_SC1_3,ZMS_DO3_1,ZMS_DO3_2,ZMS_DO3_3,ZMS_SC3_1,ZMS_SC3_2,ZMS_SC3_3,ZMS_DO7_1,ZMS_DO7_2,ZMS_DO7_3,ZMS_SC7_1,ZMS_SC7_2,ZMS_SC7_3,ZMS_DO14_1,ZMS_DO14_2,ZMS_DO14_3,ZMS_SC14_1,ZMS_SC14_2,ZMS_SC14_3,ZMS_GANTDO3_1,ZMS_GANTDO3_2,ZMS_GANTDO3_3,ZMS_GANTDO7_1,ZMS_GANTDO7_2,ZMS_GANTDO7_3,ZMS_GANTDO14_1,ZMS_GANTDO14_2,ZMS_GANTDO14_3 --prefix test2

#!/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/R_3.5.0/bin/Rscript

suppressMessages({
library(ggplot2)
library(reshape2)
library(argparser)})

argv <- arg_parser('')
argv <- add_argument(argv,"--fpkm", help="the fpkm file")
argv <- add_argument(argv,"--sample", help="the sample name")
argv <- add_argument(argv,"--condition", help="the condition file")
argv <- add_argument(argv,"--prefix", help="the prefix")
argv <- parse_args(argv)

fpkm <- argv$fpkm
sample <- strsplit(argv$sample,",")[[1]]
condition <- argv$condition
prefix <- argv$prefix

#file
if (!is.na(sample)){
	stat <- read.delim(fpkm, header=T, sep='\t', quote='')
	stat <- stat[,c('gene_name',sample)]
	stat$gene_name <- toupper(as.character(stat$gene_name))
	if (ncol(stat) > 2){
		stat <- stat[order(rowSums(stat[,2:ncol(stat)]),decreasing=T),]
	}else{
		stat <- stat[order(stat[,2],decreasing=T),]
	}
	stat <- stat[!duplicated(stat$gene_name),]
}else if (!is.na(condition)){
	condition <- read.delim(condition, header=T, sep='\t', quote='')
	sample <- as.character(condition$sample)
	stat <- read.delim(fpkm, header=T, sep='\t', quote='')
	stat <- stat[,c('gene_name',sample)]
	stat$gene_name <- toupper(as.character(stat$gene_name))
	if (ncol(stat) > 2){
		stat <- stat[order(rowSums(stat[,2:ncol(stat)]),decreasing=T),]
	}else{
		stat <- stat[order(stat[,2],decreasing=T),]
	}
	stat <- stat[!duplicated(stat$gene_name),]
}else{
	stat <- read.delim(fpkm, header=T, sep='\t', quote='')
	stat[,1] <- as.character(stat[,1])
	if (ncol(stat) > 2){
		stat <- stat[order(rowSums(stat[,2:ncol(stat)]),decreasing=T),]
	}else{
		stat <- stat[order(stat[,2],decreasing=T),]
	}
	stat <- stat[!duplicated(stat$gene_name),]
}
stat <- na.omit(stat)
if (ncol(stat) > 2){
	stat <- stat[rowSums(stat[,2:ncol(stat)]) > 0,]
}else{
	stat <- stat[stat[,2] > 0,,drop=F]
}

write.table(stat,file=paste0(prefix,'_tmp_fpkm.xls'),sep='\t',quote=F,row.names=F)

#analyse
source('/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/cibersort/supportFunc_cibersort.R')
LM22 <- '/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/cibersort/LM22.xls'
TME.results <- CIBERSORT(LM22, paste0(prefix,'_tmp_fpkm.xls'), perm = 1000, QN = TRUE)
TME.results <- data.frame(sample=rownames(TME.results),TME.results,check.names=F)
write.table(TME.results,file=paste0(prefix,'_cibersort.xls'),sep='\t',quote=F,row.names=F)

#plot
stat <- TME.results[,1:23]
stat <- melt(stat)

#boxplot
p <- ggplot(stat,aes(x=variable,y=value,colour=variable,fill=variable)) +
	geom_boxplot(alpha=0.5,outlier.size=1) +
	xlab("Cell Type") +
	ylab("ratio") +
	labs(title="Cell ratio") +
	theme_bw() +
	theme(axis.text.x=element_text(hjust=1,angle=45)) +
	theme(plot.title=element_text(hjust=0.5))
pdf(file=paste0(prefix,'_boxplot.pdf'),width=10)
p
dev.off()
#png(file=paste0(prefix,'_boxplot.png'),width=10,res=4*72,units='in',type='cairo-png')
ggsave(file=paste0(prefix,'_boxplot.png'),width=10,type='cairo-png',plot=p)
#histogram
p <- ggplot(stat,aes(x=sample,y=value,fill=variable)) +
	geom_bar(stat='identity') +
	ylab("ratio") +
	xlab("sample") +
	labs(fill="Cell Type") +
	theme(plot.margin=unit(c(1,1,2,2),'lines')) +
	theme(panel.background=element_rect(fill="transparent"),axis.line=element_line()) +
	theme(axis.text.x=element_text(hjust=1,angle=45,size=6))
pdf(file=paste0(prefix,'_bar.pdf'),width=(ncol(stat)-1)*0.2+4)
p
dev.off()
ggsave(file=paste0(prefix,'_bar.png'),width=(ncol(stat)-1)*0.2+4,type='cairo-png',plot=p)

readme:

X101SC22073970-Z01-J035_tmp_fpkm.xls 文件为gene_fpkm表达量表格去除所有样本为零表达的基因表达量
	第一列 gene_name 为大写的基因名称
	第二列到最后一列 为每个样本的fpkm表达量
X101SC22073970-Z01-J035_cibersort.xls 为cibersort软件的计算结果
	第一列为样本名称
	第二列到最后一列是每个样本每种细胞类型预测的比例
X101SC22073970-Z01-J035_boxplot.png/pdf	为不同细胞的盒形图
X101SC22073970-Z01-J035_bar.png/pdf 为不同细胞的柱形图