/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/cibersort/cibersort --fpkm /TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/cibersort/mus_fpkm.xls --sample ZMS_DO1_1,ZMS_DO1_2,ZMS_DO1_3,ZMS_SC1_1,ZMS_SC1_2,ZMS_SC1_3,ZMS_DO3_1,ZMS_DO3_2,ZMS_DO3_3,ZMS_SC3_1,ZMS_SC3_2,ZMS_SC3_3,ZMS_DO7_1,ZMS_DO7_2,ZMS_DO7_3,ZMS_SC7_1,ZMS_SC7_2,ZMS_SC7_3,ZMS_DO14_1,ZMS_DO14_2,ZMS_DO14_3,ZMS_SC14_1,ZMS_SC14_2,ZMS_SC14_3,ZMS_GANTDO3_1,ZMS_GANTDO3_2,ZMS_GANTDO3_3,ZMS_GANTDO7_1,ZMS_GANTDO7_2,ZMS_GANTDO7_3,ZMS_GANTDO14_1,ZMS_GANTDO14_2,ZMS_GANTDO14_3 --prefix test2
#!/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/R_3.5.0/bin/Rscript suppressMessages({ library(ggplot2) library(reshape2) library(argparser)}) argv <- arg_parser('') argv <- add_argument(argv,"--fpkm", help="the fpkm file") argv <- add_argument(argv,"--sample", help="the sample name") argv <- add_argument(argv,"--condition", help="the condition file") argv <- add_argument(argv,"--prefix", help="the prefix") argv <- parse_args(argv) fpkm <- argv$fpkm sample <- strsplit(argv$sample,",")[[1]] condition <- argv$condition prefix <- argv$prefix #file if (!is.na(sample)){ stat <- read.delim(fpkm, header=T, sep='\t', quote='') stat <- stat[,c('gene_name',sample)] stat$gene_name <- toupper(as.character(stat$gene_name)) if (ncol(stat) > 2){ stat <- stat[order(rowSums(stat[,2:ncol(stat)]),decreasing=T),] }else{ stat <- stat[order(stat[,2],decreasing=T),] } stat <- stat[!duplicated(stat$gene_name),] }else if (!is.na(condition)){ condition <- read.delim(condition, header=T, sep='\t', quote='') sample <- as.character(condition$sample) stat <- read.delim(fpkm, header=T, sep='\t', quote='') stat <- stat[,c('gene_name',sample)] stat$gene_name <- toupper(as.character(stat$gene_name)) if (ncol(stat) > 2){ stat <- stat[order(rowSums(stat[,2:ncol(stat)]),decreasing=T),] }else{ stat <- stat[order(stat[,2],decreasing=T),] } stat <- stat[!duplicated(stat$gene_name),] }else{ stat <- read.delim(fpkm, header=T, sep='\t', quote='') stat[,1] <- as.character(stat[,1]) if (ncol(stat) > 2){ stat <- stat[order(rowSums(stat[,2:ncol(stat)]),decreasing=T),] }else{ stat <- stat[order(stat[,2],decreasing=T),] } stat <- stat[!duplicated(stat$gene_name),] } stat <- na.omit(stat) if (ncol(stat) > 2){ stat <- stat[rowSums(stat[,2:ncol(stat)]) > 0,] }else{ stat <- stat[stat[,2] > 0,,drop=F] } write.table(stat,file=paste0(prefix,'_tmp_fpkm.xls'),sep='\t',quote=F,row.names=F) #analyse source('/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/cibersort/supportFunc_cibersort.R') LM22 <- '/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/cibersort/LM22.xls' TME.results <- CIBERSORT(LM22, paste0(prefix,'_tmp_fpkm.xls'), perm = 1000, QN = TRUE) TME.results <- data.frame(sample=rownames(TME.results),TME.results,check.names=F) write.table(TME.results,file=paste0(prefix,'_cibersort.xls'),sep='\t',quote=F,row.names=F) #plot stat <- TME.results[,1:23] stat <- melt(stat) #boxplot p <- ggplot(stat,aes(x=variable,y=value,colour=variable,fill=variable)) + geom_boxplot(alpha=0.5,outlier.size=1) + xlab("Cell Type") + ylab("ratio") + labs(title="Cell ratio") + theme_bw() + theme(axis.text.x=element_text(hjust=1,angle=45)) + theme(plot.title=element_text(hjust=0.5)) pdf(file=paste0(prefix,'_boxplot.pdf'),width=10) p dev.off() #png(file=paste0(prefix,'_boxplot.png'),width=10,res=4*72,units='in',type='cairo-png') ggsave(file=paste0(prefix,'_boxplot.png'),width=10,type='cairo-png',plot=p) #histogram p <- ggplot(stat,aes(x=sample,y=value,fill=variable)) + geom_bar(stat='identity') + ylab("ratio") + xlab("sample") + labs(fill="Cell Type") + theme(plot.margin=unit(c(1,1,2,2),'lines')) + theme(panel.background=element_rect(fill="transparent"),axis.line=element_line()) + theme(axis.text.x=element_text(hjust=1,angle=45,size=6)) pdf(file=paste0(prefix,'_bar.pdf'),width=(ncol(stat)-1)*0.2+4) p dev.off() ggsave(file=paste0(prefix,'_bar.png'),width=(ncol(stat)-1)*0.2+4,type='cairo-png',plot=p)
readme:
X101SC22073970-Z01-J035_tmp_fpkm.xls 文件为gene_fpkm表达量表格去除所有样本为零表达的基因表达量 第一列 gene_name 为大写的基因名称 第二列到最后一列 为每个样本的fpkm表达量 X101SC22073970-Z01-J035_cibersort.xls 为cibersort软件的计算结果 第一列为样本名称 第二列到最后一列是每个样本每种细胞类型预测的比例 X101SC22073970-Z01-J035_boxplot.png/pdf 为不同细胞的盒形图 X101SC22073970-Z01-J035_bar.png/pdf 为不同细胞的柱形图