Metastat组间差异分析

目的:

1.为了研究组间具有显著性差异的抗性基因,利用 Metastats(非参数多重检验和p值校正的整合)方法对组间的抗性基因丰度数据进行假设检验得到 p 值,通过对 p 值的校正,>得到 q值;最后根据 q 值筛选具有显著性差异的基因

2.其他kegg,cazy,eggnog 组间差异都可用

缺点:基因个数多时分析特别慢

输入文件: 1.表达量文件:

2.分组信息文件

3.比较组合文件

脚本: /TJPROJ6/RNA_SH/script_dir/Metastat/MetaStat1.3.fun.R

示例:

/PUBLIC/software/public/System/R-2.15.3/bin/Rscript MetaStat1.3.fun.R –outdir 输出路径 –output 输出名 –group 分组信息(all.mf) –Vslist 比较组合文件 –threshold 0.05 –infilepath 表达量文件

suppressPackageStartupMessages(library("optparse"))
option_list <- list(
		make_option("--infilepath", action="store",default=NULL, help="The input files path"),
        make_option("--output", action="store",default='output', help="The output files name, default= output"),
		make_option("--group", action="store",default=NULL, help="The file contain group infomation ,without header"),
		make_option("--vs", action="store", default=NULL, help="The Comparison group info,look like: Contrast,Case;Contrast,Case;Contrast,Case..."),
		make_option("--outdir", action="store",,default="./", help="The output dirctory,  [default %default]"),
		make_option("--threshold", type="double",default=0.05, help="The cutoff value or sig [default %default]"),
		make_option("--Vslist", action="store",default=NULL, help="The Comparison group info,look like: a\tb\nc\tb")
#make_option("--correlation", type="integer",default=1, help="Correlation to use: 1=pearson, 2=spearman, 3=kendall [default %default]"),
#make_option("--rmode", action="store_true",default=FALSE, help="Mode: TRUE=R mode, FALSE=Q mode [default %default]")
)

#get command line options
opt<-parse_args(OptionParser(usage="%prog [options] file\n", option_list=option_list))
if(is.null(opt$infilepath)||(is.null(opt$vs)&&is.null(opt$Vslist))){
	cat ("Use  %prog -h for more help info\nThe author: wangxiaohong@novogene.cn\n")
	quit("no")
}

#args<-commandArgs(T)
#if(length(args)<3){
#	cat("[usage:] <Dir:/otu97/Evenabs/> <greoup.info> <Contral_GroupAname,Case_GroupBname;Contral_GroupAname,Case_GroupCname;...> <Dir:outdir> \n")
#	cat ("Example:  plot_aplhaindex.R Report01/03.Make_OTU/otu97/Evenabs/ group.info GroupAname,GroupBname... outdir\n")
#	quit("no")
#}
infilepath <- opt$infilepath
matfiles<-opt$infilepath
group.file<-opt$group
outdir<-opt$outdir

if(!is.null(opt$Vslist)&&file.exists(opt$Vslist)){
	vslistdata<-read.table(opt$Vslist,sep="\t")
	vslistdata<-as.matrix(vslistdata)
	pnu<-dim(vslistdata)[1]
	for(i in 1:pnu){
		if(is.null(opt$vs)){
			opt$vs<-paste(as.vector(vslistdata[i,]),collapse=",")
			print (opt$vs)
		}else{
			opt$vs<-paste(opt$vs,paste(as.vector(vslistdata[i,]),collapse=","),sep=";")
		}
	}
}
if(!file.exists(outdir)){
	dir.create(outdir)
}

groupnames<-unlist (strsplit(opt$vs,",|;",fixed=F))
Pairs<-unlist(strsplit(opt$vs,";",fixed=T))
source ("/TJPROJ7/RNA_R/shouhou/script_dir/meta/Metastat/MetaStats/ddaf3.R") ##
group<-read.table(group.file,sep="\t",header=F)
#group.all<-group[,which(group[,2] %in% unlist(groupnames))==T]
group.all<-group[which(group[,2] %in% groupnames),][,1]
temp.outdir<-outdir

	cfgops<-opt$output
	if (!file.exists(outdir)){
		dir.create(outdir)
	}
	T.level<-read.table(infilepath,head=T,sep="\t")
	row.names(T.level)<-T.level[,1]
	T.level[,1]<-NULL
#T.level[,dim(T.level)[2]]<-NULL
	#data<-T.level[,1:(dim(T.level)[2]-1)]
	data<-T.level
	#colnames(data)<-colnames(T.level)[2:length(T.level[1,])]
	select.data<-colnames(data) %in% group.all
	select.name<-colnames(data)[select.data]
	#cat (select.name,"\n")
	#group<-group[which(group[,1] %in% colnames(data)),]
	#group.all<-group[which(group[,1] %in% groupnames),][,1]
	select.data.file<-paste(outdir,"/",opt$output,".xls",sep="")
	write.table(data[,select.data],select.data.file,quote = FALSE,sep="\t")
	data<-data[,select.data]
	#cat(length(Pairs),"\n")
	for(p in 1:length(Pairs)){
		pair<-unlist (strsplit(Pairs[p],",|;",fixed=F))
		#cat(pair[1],"\t",pair[2])
		#pair.data.file<-paste(outdir,"/",pair.file.name,".mat",sep="")
		
		##group1<-which(group[which(group[,1] %in% select.name),2]==pair[1])
		group1.dat<-which(group[,1] %in% select.name)[which(group[which(group[,1] %in% select.name),2]==pair[1])]
    group1<-which(select.name %in% group[group1.dat,1])
		#cat (group1,"\n")
		##group2<-which(group[which(group[,1] %in% select.name),2]==pair[2])
		group2.dat<-which(group[,1] %in% select.name)[which(group[which(group[,1] %in% select.name),2]==pair[2])]
		group2<-which(select.name %in% group[group2.dat,1])
		#cat (group2,"\n")
		mark.group2<-length(group1)+1
		#group.pair<-group[,which(group[,2] %in% pair)==T]
		group.pair<-c(group1,group2)
		#q("no")
			
		perfix<-paste(pair,collapse="-vs-")
		pair.file.name<-paste(outdir,"/",perfix,sep="")
		test.file.name<-paste(pair.file.name,".test.xls",sep="")
		p.file.name<-paste(pair.file.name,".psig.xls",sep="")
		q.file.name<-paste(pair.file.name,".qsig.xls",sep="")
		write.table(data[,group.pair],paste(pair.file.name,".",cfgops,".mat",sep=""),quote = FALSE,sep="\t",col.names = NA)
		#cat (mark_group2,"\n")
		detect_differentially_abundant_features(select.data.file,group1,group2,mark.group2,test.file.name,p.file.name,q.file.name,pflag = NULL, threshold = 0.05, B = NULL)
		#cat (select.data.file,group1,group2,mark_group2,test.file.name,p.file.name,q.file.name,"\n\n")
	}
#detect_differentially_abundant_features("./phylum/otu_table.p.absolute.mat",c(91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120),c(31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60),31, "./phylum/test_HC.A_GC.B","./phylum/psig_HC.A_GC.B","./phylum/qsig_HC.A_GC.B",pflag = NULL, threshold = 0.05, B = NULL)

结果:

     mean(group1):均值(分组1)
      variance(group1):方差(分组1)
      standard error(group1):标准误(分组1)
      mean(group2):均值(分组2)
      variance(group2):方差(分组2)
      standard error(group2):标准误(分组2)
      p value:p值
      q value :q值