Mfuzz_TCseq_Time_series_analysis

欢迎批评指正!!!

Title :Soft clustering of time series gene expression data ,模糊均值算法(fuzzy c-means)
http://mfuzz.sysbiolab.eu/

/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/zuotu/Mfuzz_TCseq_Time_series_analysis

脚本

#!/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/R_3.6.0/bin/Rscript

suppressMessages({
library(Mfuzz)
library(TCseq)
library(argparser)})

argv <- arg_parser('')
argv <- add_argument(argv,"--fpkm",help="the fpkm file")
argv <- add_argument(argv,"--das", help="the differential analysis results file")
argv <- add_argument(argv,"--sample",help="the sample name")
argv <- add_argument(argv,"--number",help="the number of cluster,default:6",type='numeric')
argv <- add_argument(argv,"--m",help="the fuzzifier",type='numeric')
argv <- add_argument(argv,"--software",help="the one of Mfuzz or TCseq,default:Mfuzz")
argv <- add_argument(argv,"--perfix",help="the perfix")
argv <- parse_args(argv)

fpkm <- argv$fpkm
das <- argv$das
sample <- argv$sample
number <- argv$number
m <- argv$m
software <- argv$software
perfix <- argv$perfix

if(is.na(software)){software <- 'Mfuzz'}
fpkm_stat <- read.delim(fpkm, header=T, sep='\t', row.names=1, quote='')
gene_vector <- NULL
if(!is.na(das)){
	das_vector <- strsplit(das,',')[[1]]
	for (i in 1:length(das_vector)) {
		data <- read.delim(das_vector[i],header=TRUE,sep='\t')
		gene_id <- as.character(data[,1])
		gene_vector <- c(gene_vector,gene_id)}
	gene_vector <- gene_vector[!duplicated(gene_vector)]
	fpkm_dat <- fpkm_stat[gene_vector,]
}else{
	fpkm_dat <- fpkm_stat
}
if(!is.na(sample)){samples <- strsplit(sample,",")[[1]]
	fpkm_dat <- fpkm_stat[,samples]
}
if('gene_name' %in% colnames(fpkm_stat)){
	anno <- cbind(fpkm_dat,fpkm_stat[,which(colnames(fpkm_stat)=='gene_name'):dim(fpkm_stat)[2]])
}else{
	anno <- fpkm_dat
}
if(is.na(number)){number <- 6}

if (software == 'Mfuzz'){
	fpkm_dat <- data.matrix(fpkm_dat)
	eset <- new("ExpressionSet",exprs = fpkm_dat)
	eset <- filter.std(eset,min.std=0,visu=F)
	eset <- standardise(eset)
	if(is.na(m)){m <- mestimate(eset)}
	set.seed(0)
	cluster <- mfuzz(eset, c = number, m = m)
	for(i in 1:number){
		outfile_id <- names(cluster$cluster[cluster$cluster == i])
		membership <- cluster$membership[outfile_id,i]
	#	colnames(membership) <- paste0('cluster',1:number)
		outfile <- data.frame(gene_id=outfile_id,anno[outfile_id,],membership=membership)
		write.table(outfile,file=paste0(perfix,"_Mfuzz_cluster_",i,".xls"),sep='\t',quote=F,row.names=F)
	}
	n <- ceiling(number/3)
	pdf(file=paste0(perfix,"_Mfuzz.pdf"))
	mfuzz.plot(eset,cluster,mfrow=c(n,3),new.window= FALSE,time.labels = colnames(fpkm_dat))
	dev.off()
	png(file=paste0(perfix,"_Mfuzz.png"),type='cairo-png')
	mfuzz.plot(eset,cluster,mfrow=c(n,3),new.window= FALSE)
	dev.off()
	colo <- c("#FF8F00", "#FFA700", "#FFBF00", "#FFD700",
		"#FFEF00", "#F7FF00", "#DFFF00", "#C7FF00", "#AFFF00",
		"#97FF00", "#80FF00", "#68FF00", "#50FF00", "#38FF00",
		"#20FF00", "#08FF00", "#00FF10", "#00FF28", "#00FF40",
		"#00FF58", "#00FF70", "#00FF87", "#00FF9F", "#00FFB7",
		"#00FFCF", "#00FFE7", "#00FFFF", "#00E7FF", "#00CFFF",
		"#00B7FF", "#009FFF", "#0087FF", "#0070FF", "#0058FF",
		"#0040FF", "#0028FF", "#0010FF", "#0800FF", "#2000FF",
		"#3800FF", "#5000FF", "#6800FF", "#8000FF", "#9700FF",
		"#AF00FF", "#C700FF", "#DF00FF", "#F700FF", "#FF00EF",
		"#FF00D7", "#FF00BF", "#FF00A7", "#FF008F", "#FF0078",
		"#FF0060", "#FF0048", "#FF0030", "#FF0018")
	colorseq <- seq(0, 1, length = length(colo) + 1)
	names(colo) <- levels(cut(1,colorseq))
	exp <- exprs(eset)
	for(i in 1:number){
		tmp <- exp[cluster$cluster == i,,drop = FALSE]
		ship <- cluster$membership[rownames(tmp),i]
		ymin <- min(tmp)
		ymax <- max(tmp)
		pdf(file=paste0(perfix,"_cluster_",i,"_Mfuzz.pdf"))
		plot.default(x = NA, xlim = c(1, dim(exprs(eset))[[2]]),
		ylim = c(ymin, ymax), xlab = "Time", ylab = "Expression changes",
		main = paste("Cluster", i), axes = FALSE)
		axis(1, 1:dim(exprs(eset))[[2]], colnames(fpkm_dat))
		axis(2)
		for (k in 1:nrow(tmp)) {
			lines(tmp[k, ], col = colo[cut(ship[k],colorseq)])
		}
		dev.off()
		png(file=paste0(perfix,"_cluster_",i,"_Mfuzz.png"),type='cairo-png')
		plot.default(x = NA, xlim = c(1, dim(exprs(eset))[[2]]),
			ylim = c(ymin, ymax), xlab = "Time", ylab = "Expression changes",
			main = paste("Cluster", i), axes = FALSE)
		axis(1, 1:dim(exprs(eset))[[2]], colnames(fpkm_dat))
		axis(2)
		for (k in 1:nrow(tmp)) {
			lines(tmp[k, ], col = colo[cut(ship[k],colorseq)])
		}
		dev.off()
	}
}else if (software == 'TCseq'){
	fpkm_dat <- as.matrix(fpkm_dat)
	set.seed(0)
	tcseq_cluster <- timeclust(fpkm_dat, algo = 'cm', k = number, standardize = TRUE)
	for(i in 1:number){
		outfile_id <- names(tcseq_cluster@cluster[tcseq_cluster@cluster == i])
		membership <- tcseq_cluster@membership[outfile_id,i]
		outfile <- data.frame(gene_id=outfile_id,anno[outfile_id,],membership=membership)
		write.table(outfile,file=paste0(perfix,"_TCseq_cluster_",i,".xls"),sep='\t',quote=F,row.names=F)
	}
	n <- ceiling(number/3)
	pdf(file=paste0(perfix,"_TCseq.pdf"))
	p <- timeclustplot(tcseq_cluster, value = 'z-score', cols = 3,
		axis.line.size = 0.6, axis.title.size = 8, axis.text.size = 8,
		title.size = 8, legend.title.size = 8, legend.text.size = 8)
	dev.off()
	png(file=paste0(perfix,"_TCseq.png"),type='cairo-png')
	p <- timeclustplot(tcseq_cluster, value = 'z-score', cols = 3,
		axis.line.size = 0.6, axis.title.size = 8, axis.text.size = 8,
		title.size = 8, legend.title.size = 8, legend.text.size = 8)
	dev.off()
}

使用

./Mfuzz_TCseq_Time_series_analysis -h
Warning messages:
1: package ‘e1071’ was built under R version 3.6.3
2: no DISPLAY variable so Tk is not available
usage: ./Mfuzz_TCseq_Time_series_analysis [--] [--help] [--opts OPTS] [--fpkm FPKM] [--das DAS] [--sample SAMPLE] [--number NUMBER] [--m M] [--software SOFTWARE] [--perfix PERFIX]




flags:
  -h, --help			show this help message and exit

optional arguments:
  -x, --opts OPTS			RDS file containing argument values
  -f, --fpkm FPKM			the fpkm file
  -d, --das DAS			the differential analysis results file
  -s, --sample SAMPLE			the sample name
  -n, --number NUMBER			the number of cluster,default:6
  -m, --m M			the fuzzifier
  --software SOFTWARE			the one of Mfuzz or TCseq,default:Mfuzz
  -p, --perfix PERFIX			the perfix