====疾病诊断模型构建====
/TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/Machine_learning
./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type SVM
./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type LDA
./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type KNN
./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type Bayes
#!/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/R_3.6.0/bin/Rscript
suppressMessages({
library(reshape2)
library(ggplot2)
library(argparser)
#library(e1071)#SVM
})
argv <- arg_parser('')
argv <- add_argument(argv,"--learning", help="the learning data")
argv <- add_argument(argv,"--learning_condition", help="the learning condition file")
argv <- add_argument(argv,"--predict", help="the predict data")
argv <- add_argument(argv,"--predict_condition", help="the predict condition file")
argv <- add_argument(argv,"--test", help="the test data")
argv <- add_argument(argv,"--test_condition", help="the test condition file")
argv <- add_argument(argv,"--type", help="the learning type, one of SVM , LDA , KNN , Bayes")
argv <- add_argument(argv,"--prefix", help="the prefix")
argv <- parse_args(argv)
learning <- argv$learning
learning_condition <- argv$learning_condition
predict_data <- argv$predict
predict_condition <- argv$predict_condition
test_data <- argv$test
test_condition <- argv$test_condition
type <- argv$type
prefix <-argv$prefix
if(is.na(type)){type <- 'SVM'}
learning_df <- read.delim(learning,header=T,sep='\t',quote='',row.names=1)
learning_condition_df <- read.delim(learning_condition,header=T,sep='\t',quote='')
learning_sample <- as.character(learning_condition_df$sample)
learning_group <- as.character(learning_condition_df$group)
names(learning_group) <- learning_sample
learning_df <- as.data.frame(t(learning_df[,learning_sample]))
learning_df$group <- learning_group[rownames(learning_df)]
learning_df$group <- factor(learning_df$group)
predict_df <- read.delim(predict_data,header=T,sep='\t',quote='',row.names=1)
if (!is.na(predict_condition)){
predict_sample <- as.character(read.delim(predict_condition,header=T,sep='\t',quote='')$sample)
predict_df <- predict_df[,predict_sample]
}
predict_df <- as.data.frame(t(predict_df))
if (!is.na(test_data)){
test_df <- read.delim(test_data,header=T,sep='\t',quote='',row.names=1)
if(!is.na(test_condition)){
test_sample <- as.character(read.delim(test_condition,header=T,sep='\t',quote='')$sample)
test_df <- test_df[,test_sample]
}
test_df <- as.data.frame(t(test_df))
}
if (type == "SVM"){
library(e1071)#SVM https://zhuanlan.zhihu.com/p/28504533
tuned <- tune.svm(group ~., data = learning_df, gamma = 2^c(-8,-4,0,4,8), cost = 2^c(-8,-4,0,4,8), kernel = "radial")
parameters <- tuned$best.parameters
gamma <- parameters[1,'gamma']
cost <- parameters[1,'cost']
svmfit <- svm(group ~.,data = learning_df, kernel="radial", cost=cost, gamma=gamma)
if(!is.na(test_data)){
test_res <- predict(svmfit, test_df)
test_res <- data.frame(sample=test_res,group=names(test_res))
write.table(test_res,file = paste0(prefix,"_SVM_test_res.xls"),sep='\t',quote=F,row.names=F)
}
predict_res <- predict(svmfit, predict_df)
predict_res <- data.frame(sample=names(predict_res),group=predict_res)
write.table(predict_res,file = paste0(prefix,"_SVM_predict_res.xls"),sep='\t',quote=F,row.names=F)
}else if(type == "LDA"){
library(MASS)#LDA https://zhuanlan.zhihu.com/p/25501130
data <- learning_df[,1:(ncol(learning_df)-1)]
grp <- learning_df[,ncol(learning_df)]
lda.sol <- lda(data, grp)
if(!is.na(test_data)){
test_res <- predict(lda.sol, test_df)
group <- test_res$class
test_res <- data.frame(sample=rownames(test_df), group=group)
write.table(test_res,file = paste0(prefix,"_LDA_test_res.xls"),sep='\t',quote=F,row.names=F)
}
predict_res <- predict(lda.sol, predict_df)
group <- predict_res$class
predict_res <- data.frame(sample=rownames(predict_df), group=group)
write.table(predict_res,file = paste0(prefix,"_LDA_predict_res.xls"),sep='\t',quote=F,row.names=F)
}else if(type == "KNN"){
library(caret)#KNN https://zhuanlan.zhihu.com/p/141238596
control <- trainControl(method = 'cv',number = 10)
model <- train(group~.,learning_df,method = 'knn', preProcess = c('center','scale'), trControl = control, tuneLength = 5)
if(!is.na(test_data)){
test_res <- predict(model, newdata = test_df)
test_res <- data.frame(sample=rownames(test_df), group=test_res)
write.table(test_res,file = paste0(prefix,"_KNN_test_res.xls"),sep='\t',quote=F,row.names=F)
}
predict_res <- predict(model, newdata = predict_df)
predict_res <- data.frame(sample=rownames(predict_df), group=predict_res)
write.table(predict_res,file = paste0(prefix,"_KNN_predict_res.xls"),sep='\t',quote=F,row.names=F)
}else if(type == "Bayes"){
library(klaR)#Bayes https://cloud.tencent.com/developer/article/1553503?ivk_sa=1024320u
data <- learning_df[,1:(ncol(learning_df)-1)]
grp <- learning_df[,ncol(learning_df)]
model <- NaiveBayes(data,grp)
if(!is.na(test_data)){
test_res <- predict(model, test_df)
group <- test_res$class
test_res <- data.frame(sample=rownames(test_df), group=group)
write.table(test_res,file = paste0(prefix,"_Bayes_test_res.xls"),sep='\t',quote=F,row.names=F)
}
predict_res <- predict(model, predict_df)
group <- predict_res$class
predict_res <- data.frame(sample=rownames(predict_df), group=group)
write.table(predict_res,file = paste0(prefix,"_Bayes_predict_res.xls"),sep='\t',quote=F,row.names=F)
}