Commit 3ad50a53 authored by Edi Prifti's avatar Edi Prifti

- evaluateFit force.re.evaluation

- pop dense models
- cat.cl getSign
- improve ggplot
- improve plotModel/Populutaion
- improve printModel
-print/plotRF
parent 3e7ff57f
This diff is collapsed.
This diff is collapsed.
......@@ -78,6 +78,12 @@ fit <- function(X,
stop("fit: the number of observations in 'X' is diffrent from the one in 'y' ")
}
if(clf$params$objective == "auc")
{
if(clf$params$verbose) print("... Classification mode, computing factor(y) for speedup and robustness")
y <- factor(as.character(y))
}
# The possibility to select and focus on the top best features of X
if(!is.null(clf$params$max.nb.features))
{
......@@ -87,7 +93,6 @@ fit <- function(X,
max.nb.features <- nrow(X)
}
# set a flag
path.feature.cor <- paste(path, "feature.cor.rda", sep = "")
......@@ -249,12 +254,6 @@ fit <- function(X,
}
}
if(clf$params$objective == "auc")
{
if(clf$params$verbose) print("... Classification mode, computing factor(y) for speedup and robustness")
y <- factor(as.character(y))
}
# save the data step by step to be able to resume
if(clf$experiment$save != "nothing")
{
......
......@@ -246,7 +246,7 @@ sota.rf_fit <- function(X, y, clf) {
estim.feat.importance = TRUE)
if(clf$params$verbose)
{
printModel(mod = mod.res, method = clf$params$print_ind_method, score = "fit_")
try(printModel(mod = mod.res, method = clf$params$print_ind_method, score = "fit_"), silent = TRUE)
}
# Create a resulting model collection object
model_collection[[i]] <- list(mod.res)
......@@ -260,4 +260,138 @@ sota.rf_fit <- function(X, y, clf) {
}
# NOTE to print a random forrest object we used the code written by Rafael Zambrano here in stacks exchange https://stats.stackexchange.com/questions/41443/how-to-actually-plot-a-sample-tree-from-randomforestgettree
#**************************
#return the rules of a tree
#**************************
getConds <- function(tree)
{
#store all conditions into a list
conds <- list()
#start by the terminal nodes and find previous conditions
id.leafs <- which(tree$status==-1)
j<-0
for(i in id.leafs)
{
j <- j+1
prevConds <- prevCond(tree,i)
conds[[j]] <- prevConds$cond
while(prevConds$id > 1)
{
prevConds <- prevCond(tree,prevConds$id)
conds[[j]] <- paste(conds[[j]]," & ",prevConds$cond)
}
if(prevConds$id==1)
{
conds[[j]]<-paste(conds[[j]]," => ",tree$prediction[i])
}
} # end for
return(conds)
}
#**************************
#find the previous conditions in the tree
#**************************
prevCond <- function(tree,i)
{
if(i %in% tree$right_daughter)
{
id <- which(tree$right_daughter==i)
cond <- paste(tree$split_var[id],">",tree$split_point[id])
}
if(i %in% tree$left_daughter)
{
id <- which(tree$left_daughter==i)
cond <- paste(tree$split_var[id],"<",tree$split_point[id])
}
return(list(cond=cond,id=id))
}
#remove spaces in a word
collapse <- function(x)
{
x <- sub(" ","_",x)
return(x)
}
# written by Edi Prifti
printRFObject <- function(obj)
{
tree <- getTree(obj, k=1, labelVar=TRUE)
# tree <- getTree(obj, k=2, labelVar = TRUE)
# reprtree:::as.tree(gTree = tree, rforest = obj,max.depth = 2)
#
#rename the name of the column
colnames(tree) <- sapply(colnames(tree), collapse)
rules <- getConds(tree)
print(unlist(rules))
}
tree_func <- function(final_model,
tree_num,
col.class = c("deepskyblue1", "firebrick1"),
main = "")
{
# this function comes from Shirin Glander with some minor modifications on the visu.
# https://shiring.github.io/machine_learning/2017/03/16/rf_plot_ggraph
require(dplyr)
require(ggraph)
require(igraph)
# get tree by index
tree <- randomForest::getTree(final_model,
k = tree_num,
labelVar = TRUE) %>%
tibble::rownames_to_column() %>%
# make leaf split points to NA, so the 0s won't get plotted
dplyr::mutate(`split point` = ifelse(is.na(prediction), `split point`, NA))
# prepare data frame for graph
graph_frame <- data.frame(from = rep(tree$rowname, 2),
to = c(tree$`left daughter`, tree$`right daughter`))
# convert to graph and delete the last node that we don't want to plot
graph <- graph_from_data_frame(graph_frame) %>%
delete_vertices("0")
# set node labels
V(graph)$node_label <- gsub("_", " ", as.character(tree$`split var`))
V(graph)$leaf_label <- as.character(tree$prediction)
V(graph)$split <- as.character(round(tree$`split point`, digits = 2))
# plot
plot <- ggraph(graph, 'dendrogram') +
theme_bw() +
geom_edge_link() +
geom_node_point() +
scale_fill_manual("", values = col.class) +
geom_node_text(aes(label = node_label), na.rm = TRUE, repel = TRUE) +
geom_node_label(aes(label = split), vjust = 2.5, na.rm = TRUE, fill = "white") +
geom_node_label(aes(label = leaf_label, fill = leaf_label), na.rm = TRUE,
repel = TRUE, colour = "white", fontface = "bold", show.legend = FALSE) +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.background = element_rect(fill = "white"),
panel.border = element_blank(),
axis.line = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_text(size = 18))
return(plot)
}
#tree_func(final_model = obj, 100)
......@@ -294,7 +294,7 @@ sota.svm_fit <- function(X, y, clf)
if(clf$params$verbose)
{
printModel(mod = mod.res, method = clf$params$print_ind_method, score = "fit_")
try(printModel(mod = mod.res, method = clf$params$print_ind_method, score = "fit_"), silent = TRUE)
}
# Create a resulting model collection object
......
......@@ -350,7 +350,7 @@ terBeam_fit <- function(X, y, clf)
{
if(isModel(pop[[1]]))
{
printModel(mod = pop[[1]], method = clf$params$print_ind_method, score = "fit_")
try(printModel(mod = pop[[1]], method = clf$params$print_ind_method, score = "fit_"), silent = TRUE)
}
}
......
......@@ -74,7 +74,7 @@ generateAllCombinations <- function(X, y, clf, ind.features.to.keep, sparsity, a
if(clf$params$language == "ratio" & clf$params$objective == "auc")
{
ind <- apply(pop_vec,1,min) < length(ind.features.to.keep)/2 & apply(pop_vec,1,max) > length(ind.features.to.keep)/2
ind <- (apply(pop_vec,1,min) < length(ind.features.to.keep)/2) & (apply(pop_vec, 1, max) > length(ind.features.to.keep)/2)
pop_vec <- pop_vec[ind,]
}
......
......@@ -325,7 +325,7 @@ terga1_fit <- function(X, y, clf) {
{
if(isModel(best_individual))
{
cat(paste("gen =",i,"\t", printModel(mod = best_individual, method = clf$params$print_ind_method, score = "fit_"),"\n"))
try(cat(paste("gen =",i,"\t", printModel(mod = best_individual, method = clf$params$print_ind_method, score = "fit_"),"\n")), silent = TRUE)
}
}
......
......@@ -554,7 +554,7 @@ evolve <- function(X, y, clf, pop, seed = NULL)
print(paste("length population:",length(evolved_pop)))
print(paste("length evaluation:",length(evolved_pop.eval)))
print(paste("best index:",best_individual_index))
cat(paste("gen =",i,"\t", printModel(mod = evolved_pop.eval[[which.max(evaluation)]], method = clf$params$print_ind_method, score = "fit_"),"\n"))
try(cat(paste("gen =",i,"\t", printModel(mod = evolved_pop.eval[[which.max(evaluation)]], method = clf$params$print_ind_method, score = "fit_"),"\n")), silent = TRUE)
}
# store the best performance for each generation
......@@ -636,7 +636,7 @@ evolve <- function(X, y, clf, pop, seed = NULL)
print(paste("length population:",length(evolved_pop)))
print(paste("length evaluation:",length(evolved_pop.eval)))
print(paste("best index:",best_individual_index))
cat(paste("gen =",i,"\t", printModel(mod = evolved_pop.eval[[which.max(evaluation)]], method = clf$params$print_ind_method, score = "fit_"),"\n"))
try(cat(paste("gen =",i,"\t", printModel(mod = evolved_pop.eval[[which.max(evaluation)]], method = clf$params$print_ind_method, score = "fit_"),"\n")), silent = TRUE)
}
# 4. Mutate the new population
......@@ -687,7 +687,7 @@ evolve <- function(X, y, clf, pop, seed = NULL)
print(paste("length population:",length(evolved_pop)))
print(paste("length evaluation:",length(evolved_pop.eval)))
print(paste("best index:",best_individual_index))
cat(paste("gen =",i,"\t", printModel(mod = evolved_pop.eval[[which.max(evaluation)]], method = clf$params$print_ind_method, score = "fit_"),"\n"))
try(cat(paste("gen =",i,"\t", printModel(mod = evolved_pop.eval[[which.max(evaluation)]], method = clf$params$print_ind_method, score = "fit_"),"\n")), silent = TRUE)
}
# print best
......@@ -697,7 +697,7 @@ evolve <- function(X, y, clf, pop, seed = NULL)
best_individual <- evolved_pop.eval[[best_individual_index]]
if(isModel(best_individual))
{
cat(paste("gen =",i,"\t", printModel(mod = best_individual, method = clf$params$print_ind_method, score = "fit_"),"\n"))
try(cat(paste("gen =",i,"\t", printModel(mod = best_individual, method = clf$params$print_ind_method, score = "fit_"),"\n")), silent = TRUE)
}
}
......
......@@ -386,7 +386,7 @@ terga2_fit <- function(X, y, clf)
{
if(!is.null(x))
{
cat("\t", printModel(mod = x[[1]], method = clf$params$print_ind_method, score = "fit_"), "\n")
try(cat("\t", printModel(mod = x[[1]], method = clf$params$print_ind_method, score = "fit_"), "\n"), silent = TRUE)
} else
{
cat("No individual found for this sparsity")
......
......@@ -564,7 +564,7 @@ evolve1 <- function(X, y, clf, pop, featEval)
{
if(isModel(best_individual))
{
print(printModel(mod = best_individual, method = clf$params$print_ind_method, score = "fit_"))
try(print(printModel(mod = best_individual, method = clf$params$print_ind_method, score = "fit_")), silent = TRUE)
}
}
......
#**************************
#return the rules of a tree
#**************************
getConds <- function(tree)
{
#store all conditions into a list
conds <- list()
#start by the terminal nodes and find previous conditions
id.leafs <- which(tree$status==-1)
j<-0
for(i in id.leafs)
{
j <- j+1
prevConds <- prevCond(tree,i)
conds[[j]] <- prevConds$cond
while(prevConds$id > 1)
{
prevConds <- prevCond(tree,prevConds$id)
conds[[j]] <- paste(conds[[j]]," & ",prevConds$cond)
}
if(prevConds$id==1)
{
conds[[j]]<-paste(conds[[j]]," => ",tree$prediction[i])
}
} # end for
return(conds)
}
#**************************
#find the previous conditions in the tree
#**************************
prevCond <- function(tree,i)
{
if(i %in% tree$right_daughter)
{
id <- which(tree$right_daughter==i)
cond <- paste(tree$split_var[id],">",tree$split_point[id])
}
if(i %in% tree$left_daughter)
{
id <- which(tree$left_daughter==i)
cond <- paste(tree$split_var[id],"<",tree$split_point[id])
}
return(list(cond=cond,id=id))
}
#remove spaces in a word
collapse <- function(x)
{
x <- sub(" ","_",x)
return(x)
}
data(iris)
require(randomForest)
mod.rf <- randomForest(Species ~ ., data=iris)
tree <- getTree(mod.rf, k=1, labelVar=TRUE)
#rename the name of the column
colnames(tree) <- sapply(colnames(tree),collapse)
rules <- getConds(tree)
print(unlist(rules))
# tree <- getTree(mod$obj, k=1, labelVar=TRUE)
# #rename the name of the column
# colnames(tree) <- sapply(colnames(tree),collapse)
# rules <- getConds(tree)
# print(unlist(rules))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment