Commit 9cc43acf authored by Edi Prifti's avatar Edi Prifti

- making checkers more robust

- fixing bugs in getFeaturePrevalence
- adding penalty in selectBestPopulation
- fixing bugs in mergeBestScoreCV
- adding feature selection on mergeMeltImportanceCV
- fixing small issues in plotComparativeEmpricalScore, plotComparativeCV & plotAbundanceByClass
- adding importance in plotModel
parent 3ad50a53
This diff is collapsed.
This diff is collapsed.
......@@ -603,13 +603,20 @@ runCrossval <- function(X, y, clf, lfolds = NULL, nfolds = 10, return.all = FALS
# training dataset
x_train = X[,-lfolds[[i]]]
y_train = y[-lfolds[[i]]]
# # testing dataset, needed for checking extreme cases
x_test = X[,lfolds[[i]]]
# # testing dataset needed for checking extreme cases
if(length(lfolds[[i]]) == 1) # for leave one out
{
x_test = as.matrix(X[,lfolds[[i]]])
}else
{
x_test = X[,lfolds[[i]]]
}
y_test = y[lfolds[[i]]]
}
# omit some particular cases
if(any(table(y_test)==0) | length(table(y_test))==1 | any(table(y_train)==0) | length(table(y_train))==1)
#if(any(table(y_test)==0) | length(table(y_test))==1 | any(table(y_train)==0) | length(table(y_train))==1)
if(any(table(y_train)==0) | length(table(y_train))==1) # to take into account the leve one out case
{
NULL
} else
......@@ -661,12 +668,19 @@ runCrossval <- function(X, y, clf, lfolds = NULL, nfolds = 10, return.all = FALS
x_train = X[,-lfolds[[i]]]
y_train = y[-lfolds[[i]]]
# # testing dataset needed for checking extreme cases
x_test = X[,lfolds[[i]]]
if(length(lfolds[[i]]) == 1) # for leave one out
{
x_test = as.matrix(X[,lfolds[[i]]])
}else
{
x_test = X[,lfolds[[i]]]
}
y_test = y[lfolds[[i]]]
}
# omit some particular cases
if(any(table(y_test)==0) | length(table(y_test))==1 | any(table(y_train)==0) | length(table(y_train))==1)
#if(any(table(y_test)==0) | length(table(y_test))==1 | any(table(y_train)==0) | length(table(y_train))==1)
if(any(table(y_train)==0) | length(table(y_train))==1) # to take into account the leve one out case
{
warning("runCrossval: only one level in the class impossible to compute fitness")
next
......@@ -718,7 +732,13 @@ runCrossval <- function(X, y, clf, lfolds = NULL, nfolds = 10, return.all = FALS
x_train = X[,-lfolds[[i]]]
y_train = y[-lfolds[[i]]]
# testing dataset
x_test = X[,lfolds[[i]]]
if(length(lfolds[[i]]) == 1) # for leave one out
{
x_test = as.matrix(X[,lfolds[[i]]])
}else
{
x_test = X[,lfolds[[i]]]
}
y_test = y[lfolds[[i]]]
res_train <- res.all[[i]]
......
......@@ -337,7 +337,8 @@ printRFObject <- function(obj)
tree_func <- function(final_model,
tree_num,
col.class = c("deepskyblue1", "firebrick1"),
main = "")
main = "",
node.text.color = "black")
{
# this function comes from Shirin Glander with some minor modifications on the visu.
# https://shiring.github.io/machine_learning/2017/03/16/rf_plot_ggraph
......@@ -373,7 +374,7 @@ tree_func <- function(final_model,
geom_edge_link() +
geom_node_point() +
scale_fill_manual("", values = col.class) +
geom_node_text(aes(label = node_label), na.rm = TRUE, repel = TRUE) +
geom_node_text(aes(label = node_label), na.rm = TRUE, repel = TRUE, colour = node.text.color) +
geom_node_label(aes(label = split), vjust = 2.5, na.rm = TRUE, fill = "white") +
geom_node_label(aes(label = leaf_label, fill = leaf_label), na.rm = TRUE,
repel = TRUE, colour = "white", fontface = "bold", show.legend = FALSE) +
......
......@@ -382,6 +382,7 @@ terBeam_fit <- function(X, y, clf)
}
#fullPop <- sortPopulation(fullPop, evalToOrder = "fit_")
fullPop <- unique(fullPop) # keep only unique models
if(return.perc == 100)
{
......
......@@ -382,8 +382,11 @@ myRRSkbest <- function(clf, X, y) {
fval = ter_wb[fidx]
}
# keep only unique models
best_dense_vecs <- unique(best_dense_vecs)
#if(clf$params$verbose) print(best_dense_vecs)
return(models=listOfDenseVecToModelCollection(clf = clf, X = X, y = y, v = best_dense_vecs))
return(models = listOfDenseVecToModelCollection(clf = clf, X = X, y = y, v = best_dense_vecs))
#return( list(clf=clf,models=listOfDenseVecToModelCollection(clf = clf,X = X,y = y,v = best_dense_vecs ) ) )
}
......
......@@ -335,6 +335,9 @@ terga1_fit <- function(X, y, clf) {
pop_ordered_mod <- evaluatePopulation(X, y, clf, pop_ordered_mod, force.re.evaluation = TRUE, eval.all = TRUE)
}
# keep only models that are unique
pop_ordered_mod <- unique(pop_ordered_mod)
if(!(clf$params$popSaveFile=="NULL"))
{
#pop2Save <- evaluatePopulation(X, y, clf, pop_ordered_mod, eval.all = TRUE)
......
......@@ -485,8 +485,9 @@ terga2_fit <- function(X, y, clf)
# clean the population
pop <- cleanPopulation(pop, clf)
pop <- evaluatePopulation(X, y, clf, pop, force.re.evaluation = TRUE, eval.all = TRUE)
pop <- unique(pop) # keep only unique models
# # At the end we revaluate the population forcing the scores and everything else
# At the end we revaluate the population forcing the scores and everything else
# pop <- evaluatePopulation(X, y, clf, pop, force.re.evaluation = TRUE, eval.all = FALSE)
if(clf$params$final.pop.perc == 100)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment