====疾病诊断模型构建==== /TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/Machine_learning


./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type SVM

./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type LDA

./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type KNN

./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type Bayes


#!/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/R_3.6.0/bin/Rscript

suppressMessages({
library(reshape2)
library(ggplot2)
library(argparser)
#library(e1071)#SVM
})

argv <- arg_parser('')
argv <- add_argument(argv,"--learning", help="the learning data")
argv <- add_argument(argv,"--learning_condition", help="the learning condition file")
argv <- add_argument(argv,"--predict", help="the predict data")
argv <- add_argument(argv,"--predict_condition", help="the predict condition file")
argv <- add_argument(argv,"--test", help="the test data")
argv <- add_argument(argv,"--test_condition", help="the test condition file")
argv <- add_argument(argv,"--type", help="the learning type, one of SVM , LDA , KNN , Bayes")
argv <- add_argument(argv,"--prefix", help="the prefix")
argv <- parse_args(argv)

learning <- argv$learning
learning_condition <- argv$learning_condition
predict_data <- argv$predict
predict_condition <- argv$predict_condition
test_data <- argv$test
test_condition <- argv$test_condition
type <- argv$type
prefix <-argv$prefix

if(is.na(type)){type <- 'SVM'}

learning_df <- read.delim(learning,header=T,sep='\t',quote='',row.names=1)
learning_condition_df <- read.delim(learning_condition,header=T,sep='\t',quote='')
learning_sample <- as.character(learning_condition_df$sample)
learning_group <- as.character(learning_condition_df$group)
names(learning_group) <- learning_sample
learning_df <- as.data.frame(t(learning_df[,learning_sample]))
learning_df$group <- learning_group[rownames(learning_df)]
learning_df$group <- factor(learning_df$group)

predict_df <- read.delim(predict_data,header=T,sep='\t',quote='',row.names=1)
if (!is.na(predict_condition)){
	predict_sample <- as.character(read.delim(predict_condition,header=T,sep='\t',quote='')$sample)
	predict_df <- predict_df[,predict_sample]
}
predict_df <- as.data.frame(t(predict_df))

if (!is.na(test_data)){
	test_df <- read.delim(test_data,header=T,sep='\t',quote='',row.names=1)
	if(!is.na(test_condition)){
		test_sample <- as.character(read.delim(test_condition,header=T,sep='\t',quote='')$sample)
		test_df <- test_df[,test_sample]
	}
	test_df <- as.data.frame(t(test_df))
}

if (type == "SVM"){
	library(e1071)#SVM https://zhuanlan.zhihu.com/p/28504533
	tuned <- tune.svm(group ~., data = learning_df, gamma = 2^c(-8,-4,0,4,8), cost = 2^c(-8,-4,0,4,8), kernel = "radial")
	parameters <- tuned$best.parameters
	gamma <- parameters[1,'gamma']
	cost <- parameters[1,'cost']
	svmfit <- svm(group ~.,data = learning_df, kernel="radial", cost=cost, gamma=gamma)
	if(!is.na(test_data)){
		test_res <- predict(svmfit, test_df)
		test_res <- data.frame(sample=test_res,group=names(test_res))
		write.table(test_res,file = paste0(prefix,"_SVM_test_res.xls"),sep='\t',quote=F,row.names=F)
	}
	predict_res <- predict(svmfit, predict_df)
	predict_res <- data.frame(sample=names(predict_res),group=predict_res)
	write.table(predict_res,file = paste0(prefix,"_SVM_predict_res.xls"),sep='\t',quote=F,row.names=F)
}else if(type == "LDA"){
	library(MASS)#LDA https://zhuanlan.zhihu.com/p/25501130
	data <- learning_df[,1:(ncol(learning_df)-1)]
	grp <- learning_df[,ncol(learning_df)]
	lda.sol <- lda(data, grp)
	if(!is.na(test_data)){
		test_res <- predict(lda.sol, test_df)
		group <- test_res$class
		test_res <- data.frame(sample=rownames(test_df), group=group)
		write.table(test_res,file = paste0(prefix,"_LDA_test_res.xls"),sep='\t',quote=F,row.names=F)
	}
	predict_res <- predict(lda.sol, predict_df)
	group <- predict_res$class
	predict_res <- data.frame(sample=rownames(predict_df), group=group)
	write.table(predict_res,file = paste0(prefix,"_LDA_predict_res.xls"),sep='\t',quote=F,row.names=F)
}else if(type == "KNN"){
	library(caret)#KNN https://zhuanlan.zhihu.com/p/141238596
	control <- trainControl(method = 'cv',number = 10)
	model <- train(group~.,learning_df,method = 'knn', preProcess = c('center','scale'), trControl = control, tuneLength = 5)
	if(!is.na(test_data)){
		test_res <- predict(model, newdata = test_df)
		test_res <- data.frame(sample=rownames(test_df), group=test_res)
		write.table(test_res,file = paste0(prefix,"_KNN_test_res.xls"),sep='\t',quote=F,row.names=F)
	}
	predict_res <- predict(model, newdata = predict_df)
	predict_res <- data.frame(sample=rownames(predict_df), group=predict_res)
	write.table(predict_res,file = paste0(prefix,"_KNN_predict_res.xls"),sep='\t',quote=F,row.names=F)
}else if(type == "Bayes"){
	library(klaR)#Bayes https://cloud.tencent.com/developer/article/1553503?ivk_sa=1024320u
	data <- learning_df[,1:(ncol(learning_df)-1)]
	grp <- learning_df[,ncol(learning_df)]
	model <- NaiveBayes(data,grp)
	if(!is.na(test_data)){
		test_res <- predict(model, test_df)
		group <- test_res$class
		test_res <- data.frame(sample=rownames(test_df), group=group)
		write.table(test_res,file = paste0(prefix,"_Bayes_test_res.xls"),sep='\t',quote=F,row.names=F)
	}
	predict_res <- predict(model, predict_df)
	group <- predict_res$class
	predict_res <- data.frame(sample=rownames(predict_df), group=group)
	write.table(predict_res,file = paste0(prefix,"_Bayes_predict_res.xls"),sep='\t',quote=F,row.names=F)
}