======Mfuzz_TCseq_Time_series_analysis======
欢迎批评指正!!! \\
Title :Soft clustering of time series gene expression data ,模糊均值算法(fuzzy c-means) \\
http://mfuzz.sysbiolab.eu/ \\
/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/zuotu/Mfuzz_TCseq_Time_series_analysis
===脚本===
#!/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/R_3.6.0/bin/Rscript
suppressMessages({
library(Mfuzz)
library(TCseq)
library(argparser)})
argv <- arg_parser('')
argv <- add_argument(argv,"--fpkm",help="the fpkm file")
argv <- add_argument(argv,"--das", help="the differential analysis results file")
argv <- add_argument(argv,"--sample",help="the sample name")
argv <- add_argument(argv,"--number",help="the number of cluster,default:6",type='numeric')
argv <- add_argument(argv,"--m",help="the fuzzifier",type='numeric')
argv <- add_argument(argv,"--software",help="the one of Mfuzz or TCseq,default:Mfuzz")
argv <- add_argument(argv,"--perfix",help="the perfix")
argv <- parse_args(argv)
fpkm <- argv$fpkm
das <- argv$das
sample <- argv$sample
number <- argv$number
m <- argv$m
software <- argv$software
perfix <- argv$perfix
if(is.na(software)){software <- 'Mfuzz'}
fpkm_stat <- read.delim(fpkm, header=T, sep='\t', row.names=1, quote='')
gene_vector <- NULL
if(!is.na(das)){
das_vector <- strsplit(das,',')[[1]]
for (i in 1:length(das_vector)) {
data <- read.delim(das_vector[i],header=TRUE,sep='\t')
gene_id <- as.character(data[,1])
gene_vector <- c(gene_vector,gene_id)}
gene_vector <- gene_vector[!duplicated(gene_vector)]
fpkm_dat <- fpkm_stat[gene_vector,]
}else{
fpkm_dat <- fpkm_stat
}
if(!is.na(sample)){samples <- strsplit(sample,",")[[1]]
fpkm_dat <- fpkm_stat[,samples]
}
if('gene_name' %in% colnames(fpkm_stat)){
anno <- cbind(fpkm_dat,fpkm_stat[,which(colnames(fpkm_stat)=='gene_name'):dim(fpkm_stat)[2]])
}else{
anno <- fpkm_dat
}
if(is.na(number)){number <- 6}
if (software == 'Mfuzz'){
fpkm_dat <- data.matrix(fpkm_dat)
eset <- new("ExpressionSet",exprs = fpkm_dat)
eset <- filter.std(eset,min.std=0,visu=F)
eset <- standardise(eset)
if(is.na(m)){m <- mestimate(eset)}
set.seed(0)
cluster <- mfuzz(eset, c = number, m = m)
for(i in 1:number){
outfile_id <- names(cluster$cluster[cluster$cluster == i])
membership <- cluster$membership[outfile_id,i]
# colnames(membership) <- paste0('cluster',1:number)
outfile <- data.frame(gene_id=outfile_id,anno[outfile_id,],membership=membership)
write.table(outfile,file=paste0(perfix,"_Mfuzz_cluster_",i,".xls"),sep='\t',quote=F,row.names=F)
}
n <- ceiling(number/3)
pdf(file=paste0(perfix,"_Mfuzz.pdf"))
mfuzz.plot(eset,cluster,mfrow=c(n,3),new.window= FALSE,time.labels = colnames(fpkm_dat))
dev.off()
png(file=paste0(perfix,"_Mfuzz.png"),type='cairo-png')
mfuzz.plot(eset,cluster,mfrow=c(n,3),new.window= FALSE)
dev.off()
colo <- c("#FF8F00", "#FFA700", "#FFBF00", "#FFD700",
"#FFEF00", "#F7FF00", "#DFFF00", "#C7FF00", "#AFFF00",
"#97FF00", "#80FF00", "#68FF00", "#50FF00", "#38FF00",
"#20FF00", "#08FF00", "#00FF10", "#00FF28", "#00FF40",
"#00FF58", "#00FF70", "#00FF87", "#00FF9F", "#00FFB7",
"#00FFCF", "#00FFE7", "#00FFFF", "#00E7FF", "#00CFFF",
"#00B7FF", "#009FFF", "#0087FF", "#0070FF", "#0058FF",
"#0040FF", "#0028FF", "#0010FF", "#0800FF", "#2000FF",
"#3800FF", "#5000FF", "#6800FF", "#8000FF", "#9700FF",
"#AF00FF", "#C700FF", "#DF00FF", "#F700FF", "#FF00EF",
"#FF00D7", "#FF00BF", "#FF00A7", "#FF008F", "#FF0078",
"#FF0060", "#FF0048", "#FF0030", "#FF0018")
colorseq <- seq(0, 1, length = length(colo) + 1)
names(colo) <- levels(cut(1,colorseq))
exp <- exprs(eset)
for(i in 1:number){
tmp <- exp[cluster$cluster == i,,drop = FALSE]
ship <- cluster$membership[rownames(tmp),i]
ymin <- min(tmp)
ymax <- max(tmp)
pdf(file=paste0(perfix,"_cluster_",i,"_Mfuzz.pdf"))
plot.default(x = NA, xlim = c(1, dim(exprs(eset))[[2]]),
ylim = c(ymin, ymax), xlab = "Time", ylab = "Expression changes",
main = paste("Cluster", i), axes = FALSE)
axis(1, 1:dim(exprs(eset))[[2]], colnames(fpkm_dat))
axis(2)
for (k in 1:nrow(tmp)) {
lines(tmp[k, ], col = colo[cut(ship[k],colorseq)])
}
dev.off()
png(file=paste0(perfix,"_cluster_",i,"_Mfuzz.png"),type='cairo-png')
plot.default(x = NA, xlim = c(1, dim(exprs(eset))[[2]]),
ylim = c(ymin, ymax), xlab = "Time", ylab = "Expression changes",
main = paste("Cluster", i), axes = FALSE)
axis(1, 1:dim(exprs(eset))[[2]], colnames(fpkm_dat))
axis(2)
for (k in 1:nrow(tmp)) {
lines(tmp[k, ], col = colo[cut(ship[k],colorseq)])
}
dev.off()
}
}else if (software == 'TCseq'){
fpkm_dat <- as.matrix(fpkm_dat)
set.seed(0)
tcseq_cluster <- timeclust(fpkm_dat, algo = 'cm', k = number, standardize = TRUE)
for(i in 1:number){
outfile_id <- names(tcseq_cluster@cluster[tcseq_cluster@cluster == i])
membership <- tcseq_cluster@membership[outfile_id,i]
outfile <- data.frame(gene_id=outfile_id,anno[outfile_id,],membership=membership)
write.table(outfile,file=paste0(perfix,"_TCseq_cluster_",i,".xls"),sep='\t',quote=F,row.names=F)
}
n <- ceiling(number/3)
pdf(file=paste0(perfix,"_TCseq.pdf"))
p <- timeclustplot(tcseq_cluster, value = 'z-score', cols = 3,
axis.line.size = 0.6, axis.title.size = 8, axis.text.size = 8,
title.size = 8, legend.title.size = 8, legend.text.size = 8)
dev.off()
png(file=paste0(perfix,"_TCseq.png"),type='cairo-png')
p <- timeclustplot(tcseq_cluster, value = 'z-score', cols = 3,
axis.line.size = 0.6, axis.title.size = 8, axis.text.size = 8,
title.size = 8, legend.title.size = 8, legend.text.size = 8)
dev.off()
}
===使用===
./Mfuzz_TCseq_Time_series_analysis -h
Warning messages:
1: package ‘e1071’ was built under R version 3.6.3
2: no DISPLAY variable so Tk is not available
usage: ./Mfuzz_TCseq_Time_series_analysis [--] [--help] [--opts OPTS] [--fpkm FPKM] [--das DAS] [--sample SAMPLE] [--number NUMBER] [--m M] [--software SOFTWARE] [--perfix PERFIX]
flags:
-h, --help show this help message and exit
optional arguments:
-x, --opts OPTS RDS file containing argument values
-f, --fpkm FPKM the fpkm file
-d, --das DAS the differential analysis results file
-s, --sample SAMPLE the sample name
-n, --number NUMBER the number of cluster,default:6
-m, --m M the fuzzifier
--software SOFTWARE the one of Mfuzz or TCseq,default:Mfuzz
-p, --perfix PERFIX the perfix