Commit 464f18a1 authored by Edi Prifti's avatar Edi Prifti

- fixing plotAbundanceByClass for regression mode

- getNBestModels make more robust for empty pops
- glmnet needs no na is y
parent 9d42a5cb
This diff is collapsed.
......@@ -4,9 +4,7 @@
desktop.ini
vignettes/*_cache
NAMESPACE
test.txt
Profiling_*
*.yml
testsdb3.RData
man/
.Rhistory
man/
......@@ -2053,6 +2053,7 @@ getNBestModels <- function(obj,
if(verbose) print(paste0("getNBestModels: by k sparsity"))
res <- list()
pop.valids <- c()
# for each k_sparsity
for(i in 1:length(mc))
{
......@@ -2092,9 +2093,20 @@ getNBestModels <- function(obj,
pop <- pop[1:min(n.best, length(pop))]
if(verbose) print(paste0("getNBestModels: the final population contains ", length(pop), " models"))
res[[i]] <- pop
# mark valididity
pop.valids <- c(pop.valids, isPopulation(pop))
} # end for loop
names(res) <- names(mc)
names(pop.valids) <- names(mc)
names(res) <- names(mc)[pop.valids]
mc <- mc[pop.valids]
if(!isModelCollection(mc))
{
warning("digestModelCollection: after treating the mc object no result is available. Returning NULL")
return(NULL)
}
if(single.best)
{
......
......@@ -887,84 +887,160 @@ plotAbundanceByCalss <- function(features, X, y, topdown = TRUE, main = "", plot
X <- as.matrix(X)
}
# get levels
lev <- names(table(y))
# compute p-value of the non parametric abundance test
if(length(features) == 1)
{
dat <- t(as.matrix(X[features, ]))
rownames(dat) <- features
datl1 <- t(as.matrix(X[features, y == lev[1]]))
rownames(datl1) <- features
datl2 <- t(as.matrix(X[features, y == lev[2]]))
rownames(datl2) <- features
}else
mode <- "classification"
if(class(y) == "numeric" & length(table(y)) > 2)
{
dat <- X[features, ]
datl1 <- X[features, y == lev[1]]
datl2 <- X[features, y == lev[2]]
if(ncol(X) == 1)
{
dat <- as.matrix(dat)
datl1 <- as.matrix(datl1)
datl2 <- as.matrix(datl2)
}
cat("... plotAbundanceByClass will not work for a continous y - probably in regression mode. Adapting as a uniclass\n")
mode <- "regression"
}
dat.test <- filterfeaturesK(dat, y, k = nrow(dat), sort = FALSE)
if(plot)
if(mode == "classification")
{
pvals <- dat.test$p
qvals <- rep("",nrow(dat.test))
qvals[dat.test$q<0.05] <- "*"
datl1.reshape <- melt(datl1)
colnames(datl1.reshape) <- c("feature","observation","abundance")
datl1.reshape$class <- rep(lev[1], nrow(datl1.reshape))
# get levels
lev <- names(table(y))
datl2.reshape <- melt(datl2)
colnames(datl2.reshape) <- c("feature","observation","abundance")
datl2.reshape$class <- rep(lev[2], nrow(datl2.reshape))
# compute p-value of the non parametric abundance test
if(length(features) == 1)
{
dat <- t(as.matrix(X[features, ]))
rownames(dat) <- features
datl1 <- t(as.matrix(X[features, y == lev[1]]))
rownames(datl1) <- features
datl2 <- t(as.matrix(X[features, y == lev[2]]))
rownames(datl2) <- features
}else
{
dat <- X[features, ]
datl1 <- X[features, y == lev[1]]
datl2 <- X[features, y == lev[2]]
if(ncol(X) == 1)
{
dat <- as.matrix(dat)
datl1 <- as.matrix(datl1)
datl2 <- as.matrix(datl2)
}
}
dat.reshape <- as.data.frame(t(data.frame(t(datl1.reshape), t(datl2.reshape))))
dat.reshape$abundance <- as.numeric(as.character(dat.reshape$abundance))
dat.test <- filterfeaturesK(dat, y, k = nrow(dat), sort = FALSE)
# fix factor level order
if(topdown)
if(plot)
{
# use the same factor levels as features
dat.reshape$feature <- factor(dat.reshape$feature, levels=rev(features))
pvals <- dat.test$p
qvals <- rep("",nrow(dat.test))
qvals[dat.test$q<0.05] <- "*"
datl1.reshape <- melt(datl1)
colnames(datl1.reshape) <- c("feature","observation","abundance")
datl1.reshape$class <- rep(lev[1], nrow(datl1.reshape))
datl2.reshape <- melt(datl2)
colnames(datl2.reshape) <- c("feature","observation","abundance")
datl2.reshape$class <- rep(lev[2], nrow(datl2.reshape))
dat.reshape <- as.data.frame(t(data.frame(t(datl1.reshape), t(datl2.reshape))))
dat.reshape$abundance <- as.numeric(as.character(dat.reshape$abundance))
# fix factor level order
if(topdown)
{
# use the same factor levels as features
dat.reshape$feature <- factor(dat.reshape$feature, levels=rev(features))
}else
{
# use the same factor levels as features
dat.reshape$feature <- factor(dat.reshape$feature, levels=features)
}
# plot object
p <- ggplot(dat.reshape, aes(x=feature, y = abundance, fill=class, color=class)) +
geom_boxplot() +
#scale_x_continuous(limits = range(dat.reshape$abundance)) +
coord_flip() +
#facet_grid(. ~ class) +
theme_bw() +
scale_color_manual(values = col.pt) +
scale_fill_manual(values = col.bg) +
theme(legend.position="none") +
ggtitle(main)
pad <- max(dat.reshape$abundance) + max(dat.reshape$abundance)*0.1
if(topdown){
p <- p + annotate("text", y = rep(pad,length(qvals)), x = seq(1,length(qvals),1) - 0.3, label = rev(qvals), color="gray", size=7)
}else{
p <- p + annotate("text", y = rep(pad,length(qvals)), x = seq(1,length(qvals),1) - 0.3, label = qvals, color="gray", size=7)
}
return(p)
}else
{
# use the same factor levels as features
dat.reshape$feature <- factor(dat.reshape$feature, levels=features)
return(dat.test)
}
}else # mode regression
{
# get levels
# plot object
p <- ggplot(dat.reshape, aes(x=feature, y = abundance, fill=class, color=class)) +
geom_boxplot() +
#scale_x_continuous(limits = range(dat.reshape$abundance)) +
coord_flip() +
#facet_grid(. ~ class) +
theme_bw() +
scale_color_manual(values = col.pt) +
scale_fill_manual(values = col.bg) +
theme(legend.position="none") +
ggtitle(main)
# compute p-value of the non parametric abundance test
if(length(features) == 1)
{
dat <- t(as.matrix(X[features, ]))
rownames(dat) <- features
}else
{
dat <- X[features, ]
if(ncol(X) == 1)
{
dat <- as.matrix(dat)
}
}
pad <- max(dat.reshape$abundance) + max(dat.reshape$abundance)*0.1
# we can still correlate and compute p-values
dat.test <- filterfeaturesK(dat, y, k = nrow(dat), sort = FALSE)
if(topdown){
p <- p + annotate("text", y = rep(pad,length(qvals)), x = seq(1,length(qvals),1) - 0.3, label = rev(qvals), color="gray", size=7)
}else{
p <- p + annotate("text", y = rep(pad,length(qvals)), x = seq(1,length(qvals),1) - 0.3, label = qvals, color="gray", size=7)
if(plot)
{
pvals <- dat.test$p
qvals <- rep("",nrow(dat.test))
qvals[dat.test$q<0.05] <- "*"
dat.reshape <- melt(dat)
colnames(dat.reshape) <- c("feature","observation","abundance")
dat.reshape$class <- rep("all", nrow(dat.reshape))
# fix factor level order
if(topdown)
{
# use the same factor levels as features
dat.reshape$feature <- factor(dat.reshape$feature, levels=rev(features))
}else
{
# use the same factor levels as features
dat.reshape$feature <- factor(dat.reshape$feature, levels=features)
}
# plot object
p <- ggplot(dat.reshape, aes(x=feature, y = abundance, fill=class, color=class)) +
geom_boxplot() +
#scale_x_continuous(limits = range(dat.reshape$abundance)) +
coord_flip() +
#facet_grid(. ~ class) +
theme_bw() +
scale_color_manual(values = "gray40") +
scale_fill_manual(values = "gray80") +
theme(legend.position="none") +
ggtitle(main)
pad <- max(dat.reshape$abundance) + max(dat.reshape$abundance)*0.1
if(topdown){
p <- p + annotate("text", y = rep(pad,length(qvals)), x = seq(1,length(qvals),1) - 0.3, label = rev(qvals), color="gray", size=7)
}else{
p <- p + annotate("text", y = rep(pad,length(qvals)), x = seq(1,length(qvals),1) - 0.3, label = qvals, color="gray", size=7)
}
return(p)
}else
{
return(dat.test)
}
return(p)
}else
{
return(dat.test)
}
}
......
......@@ -87,8 +87,9 @@ glmnetRR <- function(clf, X, y)
family <- "binomial"
}
system.time(glmmod <- glmnet(x = t(X),
y = y,
ina <- is.na(y)
system.time(glmmod <- glmnet(x = t(X[,!ina]),
y = y[!ina],
alpha = p$alpha,
family = family,
lower.limits = lower.limits,
......@@ -96,7 +97,7 @@ glmnetRR <- function(clf, X, y)
intercept = intercept,
lambda = lambdas,
standardize = TRUE)
)
)
if(clf$params$verbose) print("... ... glmnet object is created")
if(clf$params$plot)
......
......@@ -369,11 +369,19 @@ terga2_fit <- function(X, y, clf)
# Let's keep the best models of each sparsity in a buffer structured as modelCollection
pop2keep.mc <- listOfModels2ModelCollection(pop = pop, nBest = 5)
# keep valid populations and omit those in the mc that are no ok
valid.pops <- unlist(lapply(pop2keep.mc, isPopulation))
if(any(!valid.pops))
{
pop2keep.mc <-pop2keep.mc[valid.pops]
}
# restrain if bigger than the given size but by keeping the best for each sparsity
# sometimes the best models will be in high sparsity setting
if(length(pop) > clf$params$size_pop)
{
nBest <- round(length(pop)/length(clf$params$sparsity))
nBest <- round(length(pop)/length(clf$params$sparsity))
pop.restrict <- listOfModels2ModelCollection(pop = pop, nBest = nBest)
pop <- modelCollectionToPopulation(pop.restrict)
}
......
......@@ -134,23 +134,23 @@ population2 <- function(X, y, clf, featEval = NULL)
}
} # end random generation
if(!isPopulation(pop))
{
return(NULL)
}
# evaluate only the population but only the evalToFit and what it needs to be computed
pop <- evaluatePopulation(X, y, clf, pop, eval.all = FALSE)
pop <- unique(pop) # keep only unique samples
# sort the population by fitness
pop <- sortPopulation(pop, evalToOrder = "fit_")
#pop <- unique(pop) # keep only unique samples
pop2return <- pop # return all of it (it will be trimed later)
if(clf$params$verbose)
{
cat("\t\tThe final population is composed of ",length(pop2return)," individuals","\n")
cat("\t\tThe final population is composed of ",length(pop)," individuals","\n")
}
return(pop2return)
return(pop)
}
......@@ -291,13 +291,14 @@ individual_vec_v3 <- function(clf, signs = NULL)
#' @return the population given as an input with `nbToSelect` bests individuals with `$selected = TRUE`
tag_SelectElite <- function(clf, pop, nbToSelect)
{
pop <- sortPopulation(pop, evalToOrder = "fit_")
if(is.null(pop))
{
warning("resetTags: the population is empty")
cat("... ... resetTags: the population is empty\n")
return(NULL)
}
pop <- sortPopulation(pop, evalToOrder = "fit_")
nbSelected <- 0
for(i in 1:length(pop))
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment