欢迎批评指正!!!
Title :Soft clustering of time series gene expression data ,模糊均值算法(fuzzy c-means)
http://mfuzz.sysbiolab.eu/
/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/zuotu/Mfuzz_TCseq_Time_series_analysis
#!/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/R_3.6.0/bin/Rscript suppressMessages({ library(Mfuzz) library(TCseq) library(argparser)}) argv <- arg_parser('') argv <- add_argument(argv,"--fpkm",help="the fpkm file") argv <- add_argument(argv,"--das", help="the differential analysis results file") argv <- add_argument(argv,"--sample",help="the sample name") argv <- add_argument(argv,"--number",help="the number of cluster,default:6",type='numeric') argv <- add_argument(argv,"--m",help="the fuzzifier",type='numeric') argv <- add_argument(argv,"--software",help="the one of Mfuzz or TCseq,default:Mfuzz") argv <- add_argument(argv,"--perfix",help="the perfix") argv <- parse_args(argv) fpkm <- argv$fpkm das <- argv$das sample <- argv$sample number <- argv$number m <- argv$m software <- argv$software perfix <- argv$perfix if(is.na(software)){software <- 'Mfuzz'} fpkm_stat <- read.delim(fpkm, header=T, sep='\t', row.names=1, quote='') gene_vector <- NULL if(!is.na(das)){ das_vector <- strsplit(das,',')[[1]] for (i in 1:length(das_vector)) { data <- read.delim(das_vector[i],header=TRUE,sep='\t') gene_id <- as.character(data[,1]) gene_vector <- c(gene_vector,gene_id)} gene_vector <- gene_vector[!duplicated(gene_vector)] fpkm_dat <- fpkm_stat[gene_vector,] }else{ fpkm_dat <- fpkm_stat } if(!is.na(sample)){samples <- strsplit(sample,",")[[1]] fpkm_dat <- fpkm_stat[,samples] } if('gene_name' %in% colnames(fpkm_stat)){ anno <- cbind(fpkm_dat,fpkm_stat[,which(colnames(fpkm_stat)=='gene_name'):dim(fpkm_stat)[2]]) }else{ anno <- fpkm_dat } if(is.na(number)){number <- 6} if (software == 'Mfuzz'){ fpkm_dat <- data.matrix(fpkm_dat) eset <- new("ExpressionSet",exprs = fpkm_dat) eset <- filter.std(eset,min.std=0,visu=F) eset <- standardise(eset) if(is.na(m)){m <- mestimate(eset)} set.seed(0) cluster <- mfuzz(eset, c = number, m = m) for(i in 1:number){ outfile_id <- names(cluster$cluster[cluster$cluster == i]) membership <- cluster$membership[outfile_id,i] # colnames(membership) <- paste0('cluster',1:number) outfile <- data.frame(gene_id=outfile_id,anno[outfile_id,],membership=membership) write.table(outfile,file=paste0(perfix,"_Mfuzz_cluster_",i,".xls"),sep='\t',quote=F,row.names=F) } n <- ceiling(number/3) pdf(file=paste0(perfix,"_Mfuzz.pdf")) mfuzz.plot(eset,cluster,mfrow=c(n,3),new.window= FALSE,time.labels = colnames(fpkm_dat)) dev.off() png(file=paste0(perfix,"_Mfuzz.png"),type='cairo-png') mfuzz.plot(eset,cluster,mfrow=c(n,3),new.window= FALSE) dev.off() colo <- c("#FF8F00", "#FFA700", "#FFBF00", "#FFD700", "#FFEF00", "#F7FF00", "#DFFF00", "#C7FF00", "#AFFF00", "#97FF00", "#80FF00", "#68FF00", "#50FF00", "#38FF00", "#20FF00", "#08FF00", "#00FF10", "#00FF28", "#00FF40", "#00FF58", "#00FF70", "#00FF87", "#00FF9F", "#00FFB7", "#00FFCF", "#00FFE7", "#00FFFF", "#00E7FF", "#00CFFF", "#00B7FF", "#009FFF", "#0087FF", "#0070FF", "#0058FF", "#0040FF", "#0028FF", "#0010FF", "#0800FF", "#2000FF", "#3800FF", "#5000FF", "#6800FF", "#8000FF", "#9700FF", "#AF00FF", "#C700FF", "#DF00FF", "#F700FF", "#FF00EF", "#FF00D7", "#FF00BF", "#FF00A7", "#FF008F", "#FF0078", "#FF0060", "#FF0048", "#FF0030", "#FF0018") colorseq <- seq(0, 1, length = length(colo) + 1) names(colo) <- levels(cut(1,colorseq)) exp <- exprs(eset) for(i in 1:number){ tmp <- exp[cluster$cluster == i,,drop = FALSE] ship <- cluster$membership[rownames(tmp),i] ymin <- min(tmp) ymax <- max(tmp) pdf(file=paste0(perfix,"_cluster_",i,"_Mfuzz.pdf")) plot.default(x = NA, xlim = c(1, dim(exprs(eset))[[2]]), ylim = c(ymin, ymax), xlab = "Time", ylab = "Expression changes", main = paste("Cluster", i), axes = FALSE) axis(1, 1:dim(exprs(eset))[[2]], colnames(fpkm_dat)) axis(2) for (k in 1:nrow(tmp)) { lines(tmp[k, ], col = colo[cut(ship[k],colorseq)]) } dev.off() png(file=paste0(perfix,"_cluster_",i,"_Mfuzz.png"),type='cairo-png') plot.default(x = NA, xlim = c(1, dim(exprs(eset))[[2]]), ylim = c(ymin, ymax), xlab = "Time", ylab = "Expression changes", main = paste("Cluster", i), axes = FALSE) axis(1, 1:dim(exprs(eset))[[2]], colnames(fpkm_dat)) axis(2) for (k in 1:nrow(tmp)) { lines(tmp[k, ], col = colo[cut(ship[k],colorseq)]) } dev.off() } }else if (software == 'TCseq'){ fpkm_dat <- as.matrix(fpkm_dat) set.seed(0) tcseq_cluster <- timeclust(fpkm_dat, algo = 'cm', k = number, standardize = TRUE) for(i in 1:number){ outfile_id <- names(tcseq_cluster@cluster[tcseq_cluster@cluster == i]) membership <- tcseq_cluster@membership[outfile_id,i] outfile <- data.frame(gene_id=outfile_id,anno[outfile_id,],membership=membership) write.table(outfile,file=paste0(perfix,"_TCseq_cluster_",i,".xls"),sep='\t',quote=F,row.names=F) } n <- ceiling(number/3) pdf(file=paste0(perfix,"_TCseq.pdf")) p <- timeclustplot(tcseq_cluster, value = 'z-score', cols = 3, axis.line.size = 0.6, axis.title.size = 8, axis.text.size = 8, title.size = 8, legend.title.size = 8, legend.text.size = 8) dev.off() png(file=paste0(perfix,"_TCseq.png"),type='cairo-png') p <- timeclustplot(tcseq_cluster, value = 'z-score', cols = 3, axis.line.size = 0.6, axis.title.size = 8, axis.text.size = 8, title.size = 8, legend.title.size = 8, legend.text.size = 8) dev.off() }
./Mfuzz_TCseq_Time_series_analysis -h Warning messages: 1: package ‘e1071’ was built under R version 3.6.3 2: no DISPLAY variable so Tk is not available usage: ./Mfuzz_TCseq_Time_series_analysis [--] [--help] [--opts OPTS] [--fpkm FPKM] [--das DAS] [--sample SAMPLE] [--number NUMBER] [--m M] [--software SOFTWARE] [--perfix PERFIX] flags: -h, --help show this help message and exit optional arguments: -x, --opts OPTS RDS file containing argument values -f, --fpkm FPKM the fpkm file -d, --das DAS the differential analysis results file -s, --sample SAMPLE the sample name -n, --number NUMBER the number of cluster,default:6 -m, --m M the fuzzifier --software SOFTWARE the one of Mfuzz or TCseq,default:Mfuzz -p, --perfix PERFIX the perfix