====疾病诊断模型构建==== /TJPROJ6/RNA_SH/personal_dir/zhangxin/jiaoben/Machine_learning ./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type SVM ./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type LDA ./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type KNN ./Machine_learning.R --learning leaning.xls --learning_condition leaning_condition.xls --predict leaning.xls --predict_condition predict_condtion.xls --prefix test --type Bayes #!/TJPROJ6/RNA_SH/personal_dir/zhangxin/miniconda/envs/R_3.6.0/bin/Rscript suppressMessages({ library(reshape2) library(ggplot2) library(argparser) #library(e1071)#SVM }) argv <- arg_parser('') argv <- add_argument(argv,"--learning", help="the learning data") argv <- add_argument(argv,"--learning_condition", help="the learning condition file") argv <- add_argument(argv,"--predict", help="the predict data") argv <- add_argument(argv,"--predict_condition", help="the predict condition file") argv <- add_argument(argv,"--test", help="the test data") argv <- add_argument(argv,"--test_condition", help="the test condition file") argv <- add_argument(argv,"--type", help="the learning type, one of SVM , LDA , KNN , Bayes") argv <- add_argument(argv,"--prefix", help="the prefix") argv <- parse_args(argv) learning <- argv$learning learning_condition <- argv$learning_condition predict_data <- argv$predict predict_condition <- argv$predict_condition test_data <- argv$test test_condition <- argv$test_condition type <- argv$type prefix <-argv$prefix if(is.na(type)){type <- 'SVM'} learning_df <- read.delim(learning,header=T,sep='\t',quote='',row.names=1) learning_condition_df <- read.delim(learning_condition,header=T,sep='\t',quote='') learning_sample <- as.character(learning_condition_df$sample) learning_group <- as.character(learning_condition_df$group) names(learning_group) <- learning_sample learning_df <- as.data.frame(t(learning_df[,learning_sample])) learning_df$group <- learning_group[rownames(learning_df)] learning_df$group <- factor(learning_df$group) predict_df <- read.delim(predict_data,header=T,sep='\t',quote='',row.names=1) if (!is.na(predict_condition)){ predict_sample <- as.character(read.delim(predict_condition,header=T,sep='\t',quote='')$sample) predict_df <- predict_df[,predict_sample] } predict_df <- as.data.frame(t(predict_df)) if (!is.na(test_data)){ test_df <- read.delim(test_data,header=T,sep='\t',quote='',row.names=1) if(!is.na(test_condition)){ test_sample <- as.character(read.delim(test_condition,header=T,sep='\t',quote='')$sample) test_df <- test_df[,test_sample] } test_df <- as.data.frame(t(test_df)) } if (type == "SVM"){ library(e1071)#SVM https://zhuanlan.zhihu.com/p/28504533 tuned <- tune.svm(group ~., data = learning_df, gamma = 2^c(-8,-4,0,4,8), cost = 2^c(-8,-4,0,4,8), kernel = "radial") parameters <- tuned$best.parameters gamma <- parameters[1,'gamma'] cost <- parameters[1,'cost'] svmfit <- svm(group ~.,data = learning_df, kernel="radial", cost=cost, gamma=gamma) if(!is.na(test_data)){ test_res <- predict(svmfit, test_df) test_res <- data.frame(sample=test_res,group=names(test_res)) write.table(test_res,file = paste0(prefix,"_SVM_test_res.xls"),sep='\t',quote=F,row.names=F) } predict_res <- predict(svmfit, predict_df) predict_res <- data.frame(sample=names(predict_res),group=predict_res) write.table(predict_res,file = paste0(prefix,"_SVM_predict_res.xls"),sep='\t',quote=F,row.names=F) }else if(type == "LDA"){ library(MASS)#LDA https://zhuanlan.zhihu.com/p/25501130 data <- learning_df[,1:(ncol(learning_df)-1)] grp <- learning_df[,ncol(learning_df)] lda.sol <- lda(data, grp) if(!is.na(test_data)){ test_res <- predict(lda.sol, test_df) group <- test_res$class test_res <- data.frame(sample=rownames(test_df), group=group) write.table(test_res,file = paste0(prefix,"_LDA_test_res.xls"),sep='\t',quote=F,row.names=F) } predict_res <- predict(lda.sol, predict_df) group <- predict_res$class predict_res <- data.frame(sample=rownames(predict_df), group=group) write.table(predict_res,file = paste0(prefix,"_LDA_predict_res.xls"),sep='\t',quote=F,row.names=F) }else if(type == "KNN"){ library(caret)#KNN https://zhuanlan.zhihu.com/p/141238596 control <- trainControl(method = 'cv',number = 10) model <- train(group~.,learning_df,method = 'knn', preProcess = c('center','scale'), trControl = control, tuneLength = 5) if(!is.na(test_data)){ test_res <- predict(model, newdata = test_df) test_res <- data.frame(sample=rownames(test_df), group=test_res) write.table(test_res,file = paste0(prefix,"_KNN_test_res.xls"),sep='\t',quote=F,row.names=F) } predict_res <- predict(model, newdata = predict_df) predict_res <- data.frame(sample=rownames(predict_df), group=predict_res) write.table(predict_res,file = paste0(prefix,"_KNN_predict_res.xls"),sep='\t',quote=F,row.names=F) }else if(type == "Bayes"){ library(klaR)#Bayes https://cloud.tencent.com/developer/article/1553503?ivk_sa=1024320u data <- learning_df[,1:(ncol(learning_df)-1)] grp <- learning_df[,ncol(learning_df)] model <- NaiveBayes(data,grp) if(!is.na(test_data)){ test_res <- predict(model, test_df) group <- test_res$class test_res <- data.frame(sample=rownames(test_df), group=group) write.table(test_res,file = paste0(prefix,"_Bayes_test_res.xls"),sep='\t',quote=F,row.names=F) } predict_res <- predict(model, predict_df) group <- predict_res$class predict_res <- data.frame(sample=rownames(predict_df), group=group) write.table(predict_res,file = paste0(prefix,"_Bayes_predict_res.xls"),sep='\t',quote=F,row.names=F) }