...
 
Commits (3)
This diff is collapsed.
......@@ -6,5 +6,4 @@ vignettes/*_cache
NAMESPACE
Profiling_*
*.yml
.Rhistory
man/
......@@ -35,5 +35,5 @@ Description: The predomics offers access to a novel framework implementing sever
state of the art methods (SOTA) such as RF, ENET and SVM.
License: LICENCE
LazyData: TRUE
RoxygenNote: 6.1.0
RoxygenNote: 6.1.1
Encoding: UTF-8
......@@ -73,7 +73,7 @@ export(mutate)
export(myAssertNotNullNorNa)
export(normModelCoeffs)
export(plotAUC)
export(plotAUC2)
export(plotAUCg)
export(plotAbundanceByCalss)
export(plotComparativeBestCV)
export(plotComparativeCV)
......@@ -122,6 +122,7 @@ import(doSNOW)
import(foreach)
import(ggplot2)
import(gridExtra)
import(pROC)
import(plyr)
import(reshape2)
import(snow)
......
......@@ -412,6 +412,17 @@ isClf <- function(obj)
return(res)
}
#' Evaluates wether an object is a model SOTA SVM
#'
#' @description Evaluates wether a learner is SOTA or not
#' @export
#' @param obj: a model to test
#' @return TRUE if the object is a SOTA learner
isLearnerSota <- function(obj)
{
res <- isClf(obj) & ((length(grep("sota",obj$learner))==1) | (length(grep("logreg",obj$params$language))==1))
return(res)
}
#' Evaluates wether an object is an experiment
#'
......@@ -458,23 +469,16 @@ isModelSota <- function(obj)
#' @return TRUE if the object is a model sota SVM
isModelSotaSVM <- function(obj)
{
res <- isModel(obj) & (length(grep("sota.svm",obj$learner))==1)
res <- isModel(obj)
if(!res)
{
return(res)
}
res <- (length(grep("sota.svm",obj$learner))==1)
return(res)
}
#' Evaluates wether an object is a model SOTA SVM
#'
#' @description Evaluates wether a learner is SOTA or not
#' @export
#' @param obj: a model to test
#' @return TRUE if the object is a SOTA learner
isLearnerSota <- function(obj)
{
res <- isClf(obj) & ((length(grep("sota",obj$learner))==1) | (length(grep("logreg",obj$params$language))==1))
return(res)
}
#' Evaluates wether an object is a model SOTA RF
#'
#' @description Evaluates wether an object is a model of type sota
......@@ -483,7 +487,12 @@ isLearnerSota <- function(obj)
#' @return TRUE if the object is a model sota RF
isModelSotaRF <- function(obj)
{
res <- isModel(obj) & (length(grep("sota.rf",obj$learner))==1)
res <- isModel(obj)
if(!res)
{
return(res)
}
res <- (length(grep("sota.rf",obj$learner))==1)
return(res)
}
......@@ -495,7 +504,12 @@ isModelSotaRF <- function(obj)
#' @export
isModelSotaGLMNET <- function(obj)
{
res <- isModel(obj) & (obj$learner == "terda")
res <- isModel(obj)
if(!res)
{
return(res)
}
res <- (obj$learner == "terda")
if(is.null(obj$obj))
{
res <- FALSE
......@@ -506,18 +520,6 @@ isModelSotaGLMNET <- function(obj)
return(res)
}
#' Evaluates wether an object is a model BTR
#'
#' @description Evaluates wether an object is a model of type BTR
#' @export
#' @param obj: a model to test
#' @return TRUE if the object is a model BTR
isModelBTR <- function(obj)
{
res <- isModel(obj) & !isModelSotaSVM(obj) & !isModelSotaRF(obj) & !isModelSotaGLMNET(obj)
return(res)
}
#' Evaluates wether an object is a model BTR Terda
#'
#' @description Evaluates wether an object is a model of type BTR
......@@ -526,7 +528,12 @@ isModelBTR <- function(obj)
#' @export
isModelTerda <- function(obj)
{
res <- isModel(obj) & (obj$learner == "terda")
res <- isModel(obj)
if(!res)
{
return(res)
}
res <- (obj$learner == "terda")
if(!obj$language %in% c("ter","terinter","bin","bininter","ratio"))
{
res <- FALSE
......@@ -534,6 +541,18 @@ isModelTerda <- function(obj)
return(res)
}
#' Evaluates wether an object is a model BTR
#'
#' @description Evaluates wether an object is a model of type BTR
#' @export
#' @param obj: a model to test
#' @return TRUE if the object is a model BTR
isModelBTR <- function(obj)
{
res <- isModel(obj) & !isModelSotaSVM(obj) & !isModelSotaRF(obj) & !isModelSotaGLMNET(obj)
return(res)
}
#' Evaluates the accuracy of a model
#'
......@@ -2449,6 +2468,16 @@ getNBestModels <- function(obj,
{
return(mod)
}
}else # not found
{
if(verbose) print(paste0("getNBestModels: single.best.k mode with k_spar ", k_spar, " not found in the results"))
if(return.population)
{
return(list(NA))
}else
{
return(NA)
}
}
}else
{
......@@ -4004,6 +4033,19 @@ getSign <- function(X, y, clf = NULL, parallel.local = FALSE)
if(!is.matrix(X)) X <- as.matrix(X)
# check dimensions
if(ncol(X) != length(y))
{
if(nrow(X) == length(y))
{
# this happens when only one element is in X it will transpose
X <- t(X)
}else
{
stop("getSign: wrong dimensions, this should not happen ... stopping.")
}
}
if(is.numeric(y)) # if regression (correlation)
{
# for optimization, in pearson, convert first in rank
......@@ -4847,8 +4889,8 @@ filterNoSignal <- function(X, side = 1, threshold = "auto", verbose = FALSE)
}
# compute the standard deviation
s <- apply(res, 1, sd)
s.median <- median(s)
s <- apply(res, 1, sd, na.rm = TRUE)
s.median <- median(s, na.rm = TRUE)
if(verbose)
{
......@@ -5925,16 +5967,28 @@ mergeMeltBestScoreCV <- function(list.results.digest, k_catalogue = NULL, score=
{
# compute the penalty data
spar <- as.numeric(gsub("k_","",k_catalogue))
k_sparse <- paste0("k_",list.results.digest[[i]]$best$model$eval.sparsity)
mean_score <- rowMeans(emp.data, na.rm = TRUE)
mean_score_penalty <- mean_score - penalty[i] * spar
#plot(mean_score, mean_score_penalty, ylim=c(0,1),xlim=c(0.5,1))
ind <- match(k_sparse, k_catalogue)
best_empirical <- rep(NA,ncol(emp.data))
best_generalization <- rep(NA,ncol(gen.data))
best_empirical <- as.numeric(emp.data[ind,])
best_generalization <- as.numeric(gen.data[ind,])
if(isModel(obj = list.results.digest[[i]]$best$model))
{
k_sparse <- paste0("k_",list.results.digest[[i]]$best$model$eval.sparsity)
#plot(mean_score, mean_score_penalty, ylim=c(0,1),xlim=c(0.5,1))
ind <- match(k_sparse, k_catalogue)
best_empirical <- rep(NA,ncol(emp.data))
best_generalization <- rep(NA,ncol(gen.data))
best_empirical <- as.numeric(emp.data[ind,])
best_generalization <- as.numeric(gen.data[ind,])
}else
{
warning(paste("mergeMeltBestScoreCV: No best model was identified for this learner based on the constrains decided."))
# plot(mean_score, mean_score_penalty, ylim=c(0,1),xlim=c(0.5,1))
# ind <- match(k_sparse, k_catalogue)
best_empirical <- rep(NA,ncol(emp.data))
best_generalization <- rep(NA,ncol(gen.data))
# best_empirical <- as.numeric(emp.data[ind,])
# best_generalization <- as.numeric(gen.data[ind,])
}
# for GLMNET models
if(all(isModelSotaGLMNET(list.results.digest[[i]]$best$model)))
......@@ -6060,10 +6114,10 @@ mergeResults <- function(list.results, sparsity = NULL, penalty = 0.001, best.k
stop("mergeResults: Please provide a vector best.k of the same length as there are experiments.
When the best.k is to be disabled for a given experiment, please provide NA for that one!")
}
if(any(!is.numeric(best.k)))
{
stop("mergeResults: Please provide a list best.k that are numeric!")
}
# if(any(!is.numeric(best.k)))
# {
# stop("mergeResults: Please provide a list best.k that are numeric!")
# }
}else
{
best.k <- rep(NA, e.nb)
......@@ -6304,8 +6358,15 @@ mergeMeltImportanceCV <- function(list.results, filter.cv.prev = 0.5, min.kfold.
ind <- match(feature.selection, rownames(list.results[[i]]$crossVal$fip$mda))
if(any(is.na(ind)))
{
print("mergeMeltImportanceCV: the features specified are not all found in the importance data... skiping")
features <- rownames(list.results[[i]]$crossVal$fip$mda[features, inds.folds[[i]]])
print("mergeMeltImportanceCV: the following features are not found in the importance data... skiping")
print(feature.selection[is.na(ind)])
if(sum(is.na(ind)) == length(feature.selection))
{
warning(paste("mergeMeltBestScoreCV: no feature found, returning NULL"))
return(NULL)
}
# features <- rownames(list.results[[i]]$crossVal$fip$mda[features, inds.folds[[i]]])
features <- feature.selection[!is.na(ind)]
}else
{
features <- feature.selection
......@@ -6354,7 +6415,7 @@ mergeMeltImportanceCV <- function(list.results, filter.cv.prev = 0.5, min.kfold.
imp.data <- imp.data.sc
}
imp.data.melt <- melt(data.frame(feature = rownames(imp.data), imp.data))
imp.data.melt <- melt(data.frame(feature = rownames(imp.data), imp.data), measure.vars = colnames(imp.data))
colnames(imp.data.melt) <- c("feature","folds","value")
res.split[[i]] <- data.frame(imp.data.melt)
res.split.fprev[[i]] <- imp.data.prev
......
......@@ -1496,15 +1496,15 @@ plotModel <- function(mod, X, y,
#' @title Plots a model or a population of model objectsas barplots of scaled coefficients.
#' @import reshape2
#' @import ggplot2
#' @import plyr
#' @description Plots a model score or a population of models as a barplots, representing each feature, the length being the coefficient
#' @param mod: a model to plot
#' @param y: the class to predict
#' @param col.sign: the colors of the cofficients based on the sign of the coefficients (default: -1=deepskyblue1, 1:firebrick1)
#' @param main: possibility to change the title of the function (default:"")
#' @export
plotModelScore <- function(mod,
plotModelScore <- function(mod = NULL,
y = NULL, # the class to predict
col.sign = c("deepskyblue1", "firebrick1"),
main = ""
)
......@@ -1530,10 +1530,24 @@ plotModelScore <- function(mod,
return(NULL)
}
# disable this plot for SOTA
if(myAssertNotNullNorNa(mod$score_) | myAssertNotNullNorNa(mod$pos_score_) | myAssertNotNullNorNa(mod$neg_score_))
# check the existence of the models score
if(!myAssertNotNullNorNa(mod$score_))
{
print("plotModelScore: The score_ attribute is missing. Returning empty handed, please provide it to the model.")
return(NULL)
}
# check the existence of the models score
if(!myAssertNotNullNorNa(y))
{
print("plotModelScore: One of either, score_, pos_score_, or neg_score_ attributes are missing. Returning empty handed.")
print("plotModelScore: The parameter y is missing. Returning empty handed, please provide it.")
return(NULL)
}
# check the existence of the models score
if(length(mod$score_) != length(y))
{
print("plotModelScore: The score_ attribute does not match y in length. Returning empty handed, please verify the input data.")
return(NULL)
}
......@@ -1544,45 +1558,47 @@ plotModelScore <- function(mod,
mod$language <- "glmnet"
}
df <- data.frame(pos = mod$pos_score_,
neg = mod$neg_score_
#,
#score = mod$score_
)
df.melt <- melt(df)
# transform in log
# df.melt$value <- df.melt$value + 1e-10
# df.melt$value <- log10(df.melt$value)
df.melt$variable <- factor(df.melt$variable, levels = c("neg","pos"))
df.melt.summarized <- ddply(df.melt, "variable", summarise, grp.mean=mean(value))
(g1 <- ggplot(data = df.melt, aes(x = value, color = variable)) +
geom_density() +
geom_vline(data = df.melt.summarized, aes(xintercept=grp.mean, color=variable), linetype="dashed") +
scale_color_manual(values=col.sign) +
if(mod$objective == "auc")
{
y <- as.factor(y)
df <- data.frame(yhat = mod$score_,
y = y)
main <- paste("Acc =", signif(mod$accuracy_,2),
" | AUC =", signif(mod$auc_,2),
" | Size =", signif(mod$eval.sparsity,2),
" | Lang =", mod$language)
g1 <- ggplot(data = na.omit(df), aes(x = y, y = yhat, color = y)) +
geom_boxplot(aes(alpha = 0.1, color = y, fill = y)) +
geom_jitter(width = 0.2) +
geom_hline(yintercept = mod$intercept_, linetype = "dashed", col = "darkgray") +
scale_color_manual(values = col.sign) +
scale_fill_manual(values = col.sign) +
ggtitle(main) +
ylab("y^") +
theme_bw() +
ylab("score distribution") +
theme(legend.position="bottom", legend.direction="horizontal")
)
(g2 <- ggplot(data = df.melt, aes(x = variable, y = value, color = variable)) +
geom_boxplot(notch = FALSE, outlier.colour = NA, position = position_dodge(width=0.9), alpha = 0.3) +
#geom_boxplot(notch = FALSE, outlier.colour = NA, position = position_dodge(width=0.9), alpha = 0.3) +
geom_point(position=position_jitterdodge(dodge.width=0.9), size = 2, alpha = 0.5) +
scale_color_manual(values=col.sign) +
ylab("score distribution") +
xlab("Ter cumulative") +
theme(aspect.ratio = 1) +
theme(legend.position="none")
}else
{
df <- data.frame(yhat = mod$score_,
y = y)
main <- paste("Rho =", signif(mod$cor_,2),
" | R2 =", signif(mod$rsq_,2),
" | SER =", signif(mod$ser_,2))
g1 <- ggplot(data = na.omit(df), aes(x = y, y = yhat, alpha = 0.9)) +
geom_smooth(method = 'lm') +
geom_point(aes(color = "darkred", size = 4)) +
ggtitle(main) +
ylab("y^") +
theme_bw() +
theme(legend.position="bottom", legend.direction="horizontal")
)
theme(aspect.ratio = 1) +
theme(legend.position="none")
}
return(g1)
return(grid.arrange(g1, g2, ncol=2))
}
......@@ -1858,37 +1874,84 @@ plotAUC <- function(score, y, main="", ci = TRUE, percent = TRUE)
}
# plotAUCg <- function(mod = NULL, verbose = FALSE, show.intercept = TRUE)
# {
# if(!isModel(mod))
# {
# if(verbose) warning("plotAUCg: please provide a valid model.")
# return(NULL)
# }
#
# # make the roc object
# robj <- roc(y ~ mod$score_, plot=FALSE, ci = TRUE)
# # get the coordonates of the best
# resa = coords(robj, x = "best", input = "threshold", best.method = "youden")
# # build the legend
# legend = c(paste("AUC:",signif(robj$auc,3)),
# paste("CI:",signif(robj$ci,3)[1],"-",signif(robj$ci,3)[3]),
# paste("Threshold:",signif(resa[1],3)))
# # make the plot
# g <- ggroc(robj) +
# annotate(geom = "text", x = 0.25, y = 0.1, label = paste(legend, collapse = "\n"), hjust=0) +
# theme_bw()
#
# # add intercept
# if(show.intercept)
# {
# g <- g + annotate(geom = "text", x = resa["specificity"], y = resa["sensitivity"], label = "+", colour = "red", size = 10)
# }
#
# return(g)
# }
#
#
#' Plot the AUC of a given classifier
#'
#' @description Analyze the results from a given classifier.
#' @import pROC
#' @param mod: a predomics model object (default = NULL)
#' @param score: this is the y^ of a given model
#' @param y: the class to be predicted
#' @param main: title of the graph
#' @param ci: the point shape for the graph
#' @param show.intercept: plot or not the intercept on the graph (default:TRUE)
#' @return a ggplot object
#' @export
plotAUC2 <- function(score, y, main = "", ci = TRUE)
plotAUCg <- function(mod = NULL, score, y, main = "", ci = TRUE, show.intercept = TRUE)
{
if(isModel(mod))
{
print("plotAUCg: using the model object to get the score")
score <- mod$score_
}
if(length(y) != length(score))
{
warning("plotAUCg: the score should be the same length as y. Returning empty-handed !")
return(NULL)
}
# for the ratio models we may have infinite values which will make the CI estimation to fail
# here is a workaround
ind <- is.infinite(score) | is.na(score) | is.nan(score)
# compute the roc object
rocobj <- roc(y ~ score, plot=FALSE, ci = ci)
rocobj <- roc(y[!ind] ~ score[!ind], plot=FALSE, ci = ci)
tpr <- mod$confusionMatrix_[1,1]/ sum(mod$confusionMatrix_[,1])
fpr <- mod$confusionMatrix_[1,2]/ sum(mod$confusionMatrix_[,2])
res.ci <- ci(rocobj)
legend <- paste0("AUC = ",signif(res.ci[2],2),
"\nCI = [", signif(res.ci[1],2),";", signif(res.ci[3],2),"]")
g <- ggroc(rocobj, colour = "black", linetype = 1, size = 1, legacy.axes = TRUE) +
g <- ggroc(rocobj, colour = "black", linetype = 1, size = 1, legacy.axes = TRUE) +
theme_bw() +
annotate("point",
x = fpr,
y = tpr,
colour = "red",
size = 10,
pch = "+") +
annotate("text",
x = 0.9,
x = 0.75,
y = 0.1,
label = paste0("AUC = ",signif(res.ci[1],2), "\nCI = [", signif(res.ci[2],2),";", signif(res.ci[3],2),"]")) +
label = legend, hjust = 0) +
guides(fill=FALSE) +
ggtitle(main) +
theme(
......@@ -1899,6 +1962,30 @@ plotAUC2 <- function(score, y, main = "", ci = TRUE)
strip.background = element_rect(colour = "white", fill = "white", size = 0.3),
strip.text.x = element_text(colour = "darkred", size = 10))
resa = coords(rocobj, x = "best", input = "threshold", best.method = "youden")
# add intercept
if(show.intercept)
{
if(sum(ind)==0) # everything fine
{
g <- g + annotate("point",
x = fpr,
y = tpr,
colour = "red",
size = 10,
pch = "+")
}else # potential no numbers
{
g <- g + annotate("point",
x = 1-resa["specificity"],
y = resa["sensitivity"],
colour = "red",
size = 10,
pch = "+")
}
}
return(g)
}
......
......@@ -2,7 +2,7 @@
output: pdf_document
---
The PredOmics package contains three novel methods for suppervised learning based on ternary coefficients.
The **predomics** package contains three methods for suppervised learning based on ternary coefficients.
============================================================
##UPDATES
......