Commit 438093ed authored by Edi Prifti's avatar Edi Prifti

- fixing bugs in regression mode

- adding regression mode in terda (still deactivated at the moment)
- fix getScore for a particular case using y
parent 0b95f336
This diff is collapsed.
......@@ -901,7 +901,7 @@ plotAbundanceByCalss <- function(features, X, y, topdown = TRUE, main = "", plot
mode <- "classification"
if(class(y) == "numeric" & length(table(y)) > 2)
{
cat("... plotAbundanceByClass will not work for a continous y - probably in regression mode. Adapting as a uniclass\n")
#cat("... plotAbundanceByClass will not work for a continous y - probably in regression mode. Adapting as a uniclass\n")
mode <- "regression"
}
......
......@@ -594,7 +594,6 @@ runClassifier <- function(X, y, clf, x_test = NULL, y_test = NULL)
# the population of models that is learned in this fold
pop.fold <- modelCollectionToPopulation(mod.collection = clf$models)
# In the case where we are in the whole dataset and x_test is null as is y_test we use X as a whole to learn the feature importance.
# In this case we will have an empirical importance
if(is.null(x_test) | is.null(y_test))
......@@ -618,6 +617,11 @@ runClassifier <- function(X, y, clf, x_test = NULL, y_test = NULL)
clf$fip <- efip.fold
}
if(clf$params$debug)
{
cat("=> DBG: after efip\n")
}
if(isModelCollection(clf$models))
{
# update the final indexes as the input X
......@@ -625,6 +629,12 @@ runClassifier <- function(X, y, clf, x_test = NULL, y_test = NULL)
}
clf$execTime <- as.numeric(Sys.time() - startingTime, units = "mins")
if(clf$params$debug)
{
cat("=> DBG: after end runclassifier\n")
}
return(clf)
}
......@@ -742,6 +752,7 @@ runCrossval <- function(X, y, clf, lfolds = NULL, nfolds = 10, return.all = FALS
x_test = X[,lfolds[[i]]]
}
y_test = y[lfolds[[i]]]
} # end else other folds
# omit some particular cases
......@@ -763,6 +774,9 @@ runCrossval <- function(X, y, clf, lfolds = NULL, nfolds = 10, return.all = FALS
clf$params$current_seed <- clf$params$seed[1] + i # set the current seed
if(clf$params$debug)
{
cat("=> DBG: before runclassifier\n")
runClassifier(X = x_train, y = y_train, clf = clf, x_test = x_test, y_test = y_test)
}else
{
......
......@@ -308,7 +308,7 @@ LPO_best_models <- function(X, y, clf, p=1, lfolds=NULL, return.all=FALSE,nk=20)
mod <- res_train.digest$best_models[[k_sparse.name]]
mod.train <- evaluateModel(mod=mod, X = x_train, y = y_train, clf = clf, eval.all = TRUE, force.re.evaluation = TRUE, mode='train')
mod.test <- list() #evaluateModel(mod=mod, X=x_test, y=y_test, clf=clf, eval.all = TRUE, force.re.evaluation = TRUE, mode='test')
scorelist <- getModelScore(mod = mod, X = as.matrix(x_test), clf, force.re.evaluation = TRUE)
scorelist <- getModelScore(mod = mod, X = as.matrix(x_test), clf = clf, force.re.evaluation = TRUE)
mod$score_ <- scorelist$score_
mod$pos_score_ <- scorelist$pos_score_
mod$neg_score_ <- scorelist$neg_score_
......@@ -366,7 +366,7 @@ LPO_best_models <- function(X, y, clf, p=1, lfolds=NULL, return.all=FALSE,nk=20)
mod <- res_train.digest_2$best_models[[k_sparse.name]]
mod.train <- evaluateModel(mod = mod, X = x_train, y = y_train, clf = clf, eval.all = TRUE, force.re.evaluation = TRUE, mode='train')
mod.test <- list() #evaluateModel(mod=mod, X=x_test, y=y_test, clf=clf, eval.all = TRUE, force.re.evaluation = TRUE, mode='test')
scorelist <- getModelScore(mod = mod, X = as.matrix(x_test), clf, force.re.evaluation = TRUE)
scorelist <- getModelScore(mod = mod, X = as.matrix(x_test), clf = clf, force.re.evaluation = TRUE)
mod$score_ <- scorelist$score_
mod$pos_score_ <- scorelist$pos_score_
mod$neg_score_ <- scorelist$neg_score_
......
......@@ -85,17 +85,17 @@ terBeam <- function(sparsity = 1:5, max.nb.features = 1000,
clf$params$estimate_coefs <- estimate_coefs
clf$params$sparsity <- sparsity
clf$params$max.nb.features <- max.nb.features
# clf$params$maxBeam <- maxBeam
# clf$params$FILENAME <- FILENAME
# clf$params$PREFIX <- PREFIX
# clf$params$maxBeam <- maxBeam
# clf$params$FILENAME <- FILENAME
# clf$params$PREFIX <- PREFIX
clf$params$saveFiles <- saveFiles # It would be interesting to add this in the future
# clf$params$pathSave <- pathSave
# clf$params$size_pop <- size_pop
# clf$params$pathSave <- pathSave
# clf$params$size_pop <- size_pop
clf$params$maxNbOfModels <- maxNbOfModels
clf$params$nbBest <- nbBest
clf$params$nbVeryBest <- nbVeryBest
# print out intermediary results
clf$params$plot <- plot # print out logs.
clf$params$verbose <- verbose # print out logs.
......@@ -296,7 +296,6 @@ terBeam_fit <- function(X, y, clf)
selected.features.pos <- features.pool[pos.ind][1:min(sum(pos.ind), nb.selected.features.neg)]
features.to.keep <- selected.features <- c(selected.features.neg, selected.features.pos)
}else
{
nb.selected.features <- max(which(nbCombinaisons < clf$params$maxNbOfModels))
......@@ -327,8 +326,12 @@ terBeam_fit <- function(X, y, clf)
}
# Evaluate the population
pop <- evaluatePopulation(X = X, y = y, clf = clf, pop = pop, force.re.evaluation = TRUE,
eval.all = TRUE, delete.null.models = TRUE, mode = "train")
pop <- evaluatePopulation(X = X, y = y, clf = clf, pop = pop,
eval.all = TRUE,
force.re.evaluation = TRUE,
estim.feat.importance = FALSE,
mode = "train",
delete.null.models = TRUE)
# Sort the population according to the clf$params$evalToFit attribute
pop <- sortPopulation(pop, evalToOrder = "fit_")
......@@ -360,7 +363,7 @@ terBeam_fit <- function(X, y, clf)
#fullPop[(length(fullPop) +1):(length(fullPop) + length(pop))] <- pop
minsize <- min(length(pop), clf$params$nbVeryBest)
fullPop[(length(fullPop) +1):(length(fullPop) + minsize)] <- pop[1:minsize]
# save populatio in a file
if(!(clf$params$popSaveFile=="NULL"))
{
......
......@@ -146,9 +146,27 @@ terda <- function(sparsity = 5, nIterations = 5, max.nb.features = 1000, kBest =
terda_fit <- function(X, y, clf)
{
if(clf$params$objective == "cor")
{
cat("... ... terda: setting regression mode.")
clf$params$language <- "regression"
}
# Setting the language environment
switch(clf$params$language,
regression=
{
# unconstrained logistic regression
if(clf$params$verbose){ print("Setting environment for regression not implemented for the moment returning null") }
# if(clf$params$evalToFit != "auc_" & clf$params$evalToFit != "accuracy_")
# {
# clf$params$evalToFit <- "auc_"
# warning("terga1_fit: not a valid evalToFit. Changing to auc_.")
# }
return(NULL)
},
logreg=
{
# unconstrained logistic regression
......
......@@ -27,6 +27,8 @@ glmnetRR <- function(clf, X, y)
p <- clf$params
check.X_y_w(X,y) #sanity check
family <- "binomial"
switch( tolower(p$language) ,
"logreg" = {
intercept = TRUE
......@@ -59,6 +61,13 @@ glmnetRR <- function(clf, X, y)
intercept = FALSE
lambdas = exp(-c(0:p$nblambdas)/(28*sqrt(p$nblambdas/150)))
},
"regression" = {
lower.limits = -1
upper.limits = 1
intercept = FALSE
family <- "gaussian"
lambdas = exp(-c(0:p$nblambdas)/(28*sqrt(p$nblambdas/150)))
},
{
stop('Unknown language! Please verify the input parameters.')
}
......@@ -79,7 +88,7 @@ glmnetRR <- function(clf, X, y)
system.time(glmmod <- glmnet(x = t(X),
y = y,
alpha = p$alpha,
family = "binomial",
family = family,
lower.limits = lower.limits,
upper.limits = upper.limits,
intercept = intercept,
......
......@@ -379,6 +379,11 @@ terga2_fit <- function(X, y, clf)
if(clf$params$verbose) cat(paste("... ... using generated population",length(pop),"\n"))
}
if(clf$params$debug)
{
print(paste("population:", length(pop)))
}
# it may happen that the population is empty, in this case return NULL
if(is.null(pop))
......@@ -429,6 +434,12 @@ terga2_fit <- function(X, y, clf)
}
pop <- cleanPopulation(pop, clf)
if(clf$params$debug)
{
print(paste("population after clean:", length(pop)))
}
pop <- evaluatePopulation(X, y, clf, pop, force.re.evaluation = TRUE, eval.all = FALSE)
# Let's keep the best models of each sparsity in a buffer structured as modelCollection
......@@ -476,6 +487,12 @@ terga2_fit <- function(X, y, clf)
{pop <- evolve2m(X, y, clf, pop, featEval)} # default is v2
)
if(clf$params$debug)
{
print(paste("population after evolver:", length(pop)))
}
if(clf$params$evolver %in% list('v2m') && generation == 0)
{
clf$params$size_pop <- round(clf$params$size_pop*3/2)
......@@ -495,11 +512,18 @@ terga2_fit <- function(X, y, clf)
best <- pop[[1]]
if(clf$params$debug)
{
str(best)
printy(best)
}
trace_evolution[[generation+1]] <- best ## A utiliser pour tester la convergence
evaluation <- populationGet_X(element2get = "fit_", toVec = TRUE, na.rm = TRUE)(pop = trace_evolution)
if(clf$params$debug)
{
print(paste("population end generation:", summary(evaluation)))
}
# # to be updated by the app
# if(clf$params$plot)
# {
......
......@@ -291,7 +291,7 @@ tag_SelectElite <- function(clf, pop, nbToSelect)
if(is.null(pop))
{
cat("... ... resetTags: the population is empty\n")
warning("... ... resetTags: the population is empty\n")
return(NULL)
}
......@@ -609,6 +609,9 @@ evolve1 <- function(X, y, clf, pop, featEval)
evolve2m <- function(X, y, clf, pop, featEval, generation)
{
pop <- sortPopulation(pop, evalToOrder = "fit_")
if(!isPopulation(obj = pop)) return(NULL)
if(clf$params$debug) {print(paste("evolve2m after sort:", length(pop)))}
nb2BeMutated <- ceiling(clf$params$select_perc * clf$params$size_pop/100)
nbByMethod <- lapply(clf$params$select_percByMethod, # On supose qu'il n'y a que 2 methodes de selection,
......@@ -629,6 +632,8 @@ evolve2m <- function(X, y, clf, pop, featEval, generation)
# indexListOfIndividualToBeMutated <- get_IndividualToBeMutated(pop)
if(clf$params$debug) {print(paste("evolve2m after tagging:", length(pop)))}
if(clf$params$parallel.local)
{
newPop <- foreach(i = 1:length(pop)) %dorng%
......@@ -732,10 +737,14 @@ evolve2m <- function(X, y, clf, pop, featEval, generation)
}
} # end for each model in the population size
} # end if no parallel
if(clf$params$debug) {print(paste("evolve2m after crossing/mutation:", length(pop)))}
pop <- sortPopulation(pop, evalToOrder = "fit_")
pop <- pop[1:min(length(pop),round(clf$params$size_pop*2/3))] # get the 2/3 population or the size of the pop if smaller
if(clf$params$debug) {print(paste("evolve2m after selection:", length(pop)))}
# set population size bigger at the first generation
if(generation==0) { clf$params$size_pop <- round(clf$params$size_pop*3/2) }
......@@ -754,6 +763,8 @@ evolve2m <- function(X, y, clf, pop, featEval, generation)
pop <- sortPopulation(pop, evalToOrder = "fit_") # sort
pop <- resetTags(pop) # reset
if(clf$params$debug) {print(paste("evolve2m after merging the new pop:", length(pop)))}
return(pop)
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment