Commit 7035effb authored by Edi Prifti's avatar Edi Prifti

clean space

parent d0cf5b60
length(v)
v
v.prop.melt
class(v)
for(i in 1:length(v))
{
v.prop.melt <- data.frame(v.prop.melt,
t(data.frame(names(v[[i]]),
v[[i]],
rep(names(v)[i], length(v[[i]]))
)
)
)
}
v.prop.melt
v.prop.melt
names(v[[i]])
v[[i]]
rep(names(v)[i]
, length(v[[i]])
)
v.prop.melt <- data.frame(t(v.prop.melt));
v.prop.melt
library(predomics)
#pdf(file="evolution_generations_terga1_ratio.pdf")
#tmp <- fit(X[,-lfolds$Fold01], y[-lfolds$Fold01],
tmp <- fit(X, y,
clf,
cross.validate = TRUE,
parallelize.folds = TRUE,
return.all = FALSE,
log.file = par.log,
path = config[[1]]$path)
v.prop.melt$name
prepare.for.graph
v.prop.melt[v.prop.melt$group=="-1",]$score <- v.prop.melt[v.prop.melt$group=="-1",]$score *-1
v.prop.melt[v.prop.melt$group=="-1",]$score
v.prop.melt$score
y
library(predomics)
#pdf(file="evolution_generations_terga1_ratio.pdf")
#tmp <- fit(X[,-lfolds$Fold01], y[-lfolds$Fold01],
tmp <- fit(X, y,
clf,
cross.validate = TRUE,
parallelize.folds = TRUE,
return.all = FALSE,
log.file = par.log,
path = config[[1]]$path)
is.null(prev.enrichment$chisq.q)
ggplot(v.prop.melt, aes(x=feature, y=prevalence, fill=group)) +
geom_bar(data=subset(v.prop.melt, group %in% c("all")), stat="identity", position="identity")
ggplot(v.prop.melt, aes(x=feature, y=prevalence, fill=group)) +
geom_bar(data=subset(v.prop.melt, group %in% c("all")), stat="identity", position="identity") +
coord_flip()
ggplot(v.prop.melt, aes(x=feature, y=prevalence, fill=group)) +
geom_bar(data=subset(v.prop.melt, group %in% c("all")), stat="identity", position="identity") +
coord_flip() +
#geom_point(data = subset(v.prop.melt, group %in% c("-1", "1")), aes(x=feature, y=prevalence, color=group, shape=group)) +
#scale_color_manual("Dataset", values = c("all"="gray90", "-1"=col.pt[1], "1"=col.pt[2])) +
#scale_fill_manual("Dataset", values = c("all"="gray90", "-1"=col.bg[1], "1"=col.bg[2])) +
#scale_shape_manual(values=c(25,24)) +
theme_bw()
ggplot(v.prop.melt, aes(x=feature, y=prevalence, fill=group)) +
geom_bar(data=subset(v.prop.melt, group %in% c("all")), stat="identity", position="identity") +
coord_flip() +
#geom_point(data = subset(v.prop.melt, group %in% c("-1", "1")), aes(x=feature, y=prevalence, color=group, shape=group)) +
#scale_color_manual("Dataset", values = c("all"="gray90", "-1"=col.pt[1], "1"=col.pt[2])) +
#scale_fill_manual("Dataset", values = c("all"="gray90", "-1"=col.bg[1], "1"=col.bg[2])) +
#scale_shape_manual(values=c(25,24)) +
theme_bw() +
theme(legend.position="none", axis.text=element_text(size=9))
......@@ -510,3 +441,72 @@ parallelize.folds = TRUE,
return.all = FALSE,
log.file = par.log,
path = config[[1]]$path)
library(predomics)
setwd("/data/projects/predomics_testing/analyses/2.db_segata/2.db_cirrhose_stage1/bug_species")
#++++++++++++++++++++++++++++++++++++
# terga2
#++++++++++++++++++++++++++++++++++++
# load data and config
library(predomics)
load("db.rda")
load("config.rda")
attach(config[[1]])
sparsity <- sparsity.ter
algorithm <- "ratio"
family <- "terga2"
clf <- terga2(#sparsity = sparsity,
sparsity = 1:5,
max.nb.features = max.nb.features,
evalToFit = evalToFit,
objective = objective,
size_pop = terga2.population,
nCores = 1,#nCores,
#nCores = nCores,
#nb_gen = terga2.nbgen,
nb_gen = 5,
#seed = seed,
seed = seed[4],
k_penalty = k_penalty,
language = algorithm,
evolveMethod = "v2m",
evolve.k1 = evolve.k1,
select_perc = select_perc,
select_percByMethod = list(50,50),
experiment.id = paste0(family,".",algorithm,"."),
experiment.save = experiment.save,
debug = FALSE)
printClassifier(clf)
#pdf(file="evolution_generations_terga1_ratio.pdf")
#tmp <- fit(X[,-lfolds$Fold01], y[-lfolds$Fold01],
tmp <- fit(X, y,
clf,
cross.validate = TRUE,
nfolds = k,
parallelize.folds = TRUE,
return.all = FALSE,
log.file = par.log,
path = config[[1]]$path)
mod
mod$fit_
#pdf(file="evolution_generations_terga1_ratio.pdf")
#tmp <- fit(X[,-lfolds$Fold01], y[-lfolds$Fold01],
tmp <- fit(X, y,
clf,
cross.validate = TRUE,
nfolds = k,
parallelize.folds = TRUE,
return.all = FALSE,
log.file = par.log,
path = config[[1]]$path)
force.re.evaluation
!myAssertNotNullNorNa(mod$score_)
myAssertNotNullNorNa(mod$score_)
mod$score_
scorelist
boxplot(scorelist)
!myAssertNotNullNorNa(mod$score_)
is.null(mod$eval.sparsity)
is.null(mod$score_)
mod$score_
myAssertNotNullNorNa(mod$score_)
getModelScore(mod, X, clf)
......@@ -6214,7 +6214,7 @@ mergeMeltImportanceCV <- function(list.results, filter.cv.prev = 0.5, min.kfold.
}
}else
{
g <- g + facet_grid(. ~ method, scale = "free")
g <- g + facet_grid(. ~ method, scales = "free")
}
# return the graph
......
This diff is collapsed.
#######################################################
# R Code ©Jean-daniel Zucker #
# mardi 26 janvier 2016 #
#######################################################
#
# #' Analysis of combinations of the dataset
# #'
# #' This method is to compute and to show the value AUC, the rule of learing. The first column of the dataseindext must be the class.
# #' The vector SIGNES of dataset is required.
# #' @import gtools (for combinations)
# #' @import caTools
# #' @param data A dataframe. The first column must be the class. ( 1 or - 1)
# #' @param sparsity The combination \code{sparsity} of the number attributes of dataset.
# #' @param pathSave The path where all the results will be saved. If \code{pathSave} is absent, so \code{pathSave} will be the Current Working Directory - \code{getwd()}
# #' @return The AUC, the rule of learning, ...
# #' @export
# #' @examples
# #' library(PredOmics)
# #' # For now, I can not test with big dataset with iter (Because there are infinite recursion!!)
# library(MASS)
# library(gplots)
# library(caTools)
# library(gtools)
# library(combinat) # TO DO JUST EXTRACT THE FUNCTION COMBINATIONS
# UTILISTER individual(X=X,y=y,clf=clf,ind=c(1,2,3)) dans tools
# UTILISER evaluatePopulation permet d'evaluer une population de modeles
# et renvoie un vecteur d'AUC.
# UTILISER evaluateAUC score i.e. valeur du modele et la classe y.
# momo = individual(X=X,y=y,clf=clf,ind=c(1,2,3))
# score = computeModelScore(X,momo)
# evaluateAUC(score,y)
#######################################################
# Function terBeam2models #
# Returns a population of best_pop models #
#######################################################
terBeam2models <- function(X, y, clf, best_pop, k) {
pop <- list()
for (i in 1:nrow(best_pop)) {
ind <- match(as.character(best_pop[i,1:k]),rownames(X))
mod <- individual(X, y, clf, ind = ind)
pop[[i]] <- mod
}
if (clf$params$verbose)
print(
paste0(
"## The terBeam2models is called for k= ", k
)
)
return(pop)
}
# Function taken from gtools
combinations <- function (n, r, v = 1:n, set = TRUE, repeats.allowed = FALSE)
{
if (mode(n) != "numeric" || length(n) != 1 || n < 1 || (n%%1) != 0)
stop("bad value of n")
if (mode(r) != "numeric" || length(r) != 1 || r < 1 || (r%%1) != 0)
stop("bad value of r")
if (!is.atomic(v) || length(v) < n)
stop("v is either non-atomic or too short")
if ((r > n) & repeats.allowed == FALSE)
stop("r > n and repeats.allowed=FALSE")
if (set)
{
v <- unique(sort(v))
if (length(v) < n)
stop("too few different elements")
}
v0 <- vector(mode(v), 0)
if (repeats.allowed)
sub <- function(n, r, v)
{
if (r == 0)
v0
else if (r == 1)
matrix(v, n, 1)
else if (n == 1)
matrix(v, 1, r)
else rbind(cbind(v[1], Recall(n, r - 1, v)), Recall(n - 1, r, v[-1]))
}
else sub <- function(n, r, v)
{
if (r == 0)
v0
else if (r == 1)
matrix(v, n, 1)
else if (r == n)
matrix(v, 1, n)
else rbind(cbind(v[1], Recall(n - 1, r - 1, v[-1])), Recall(n - 1, r, v[-1]))
}
sub(n, r, v[1:n])
}
\ No newline at end of file
X = DATAMETA1[,2:10]
y = DATAMETA1[1]
load("../../../predomics_testing/data/pasolli_2016/ibd_known_species.rda")
load("~/Research/workspace_r/predomics_testing/data/pasolli_2016/ibd_known_species.rda")
load("../../../predomics_testing/analyses/2.db_ibd_k_species/1.sota_all_models/results.sota.svm.lin.c1_spar_5_to_338.rda")
mod <- res.sota.cv.svm.lin.c1$classifier$models$k_5[[1]]
plotModel(mod = computeCoeffSVMLin(X, y, clf=clf.sota.svm.lin.c1, mod=mod), X, y)
computeCoeffSVMLin(X, y, clf=NULL, mod=NULL)
# p = length(X)
#
# mod <- ksvm(as.matrix(X),y,type="C-svc",kernel='vanilladot',C=1)
#
# M <- diag(p)
# M <- rbind(M,rep(0,9))
#
# D <- predict(mod,M,type='decision')
#
# intercpt = -D[length(D)]
#
# D <- D + intercpt
# w <- D[1:length(D)-1]
#
# print(c('coefs = ',D))
# print(c('intercept=',intercpt))
#
# xtest <- seq(1,p)
#
# print(c('prediction compute with our weights: ',w %*% xtest - intercpt))
# print(c('prediction compute with our weights: ',predict(mod,t(xtest),type='decision')))
This diff is collapsed.
#**************************
#return the rules of a tree
#**************************
getConds <- function(tree)
{
#store all conditions into a list
conds <- list()
#start by the terminal nodes and find previous conditions
id.leafs <- which(tree$status==-1)
j<-0
for(i in id.leafs)
{
j <- j+1
prevConds <- prevCond(tree,i)
conds[[j]] <- prevConds$cond
while(prevConds$id > 1)
{
prevConds <- prevCond(tree,prevConds$id)
conds[[j]] <- paste(conds[[j]]," & ",prevConds$cond)
}
if(prevConds$id==1)
{
conds[[j]]<-paste(conds[[j]]," => ",tree$prediction[i])
}
} # end for
return(conds)
}
#**************************
#find the previous conditions in the tree
#**************************
prevCond <- function(tree,i)
{
if(i %in% tree$right_daughter)
{
id <- which(tree$right_daughter==i)
cond <- paste(tree$split_var[id],">",tree$split_point[id])
}
if(i %in% tree$left_daughter)
{
id <- which(tree$left_daughter==i)
cond <- paste(tree$split_var[id],"<",tree$split_point[id])
}
return(list(cond=cond,id=id))
}
#remove spaces in a word
collapse <- function(x)
{
x <- sub(" ","_",x)
return(x)
}
data(iris)
require(randomForest)
mod.rf <- randomForest(Species ~ ., data=iris)
tree <- getTree(mod.rf, k=1, labelVar=TRUE)
#rename the name of the column
colnames(tree) <- sapply(colnames(tree),collapse)
rules <- getConds(tree)
print(unlist(rules))
# tree <- getTree(mod$obj, k=1, labelVar=TRUE)
# #rename the name of the column
# colnames(tree) <- sapply(colnames(tree),collapse)
# rules <- getConds(tree)
# print(unlist(rules))
# #######################################################
# # mainGlobal.R #
# # R Code ©Cheveleyre/Prifti/Zucker/Hanczar #
# # mardi 29 septebre 2016 #
# #######################################################
#
# #DQDA et DLDA
library(sparsediscrim)
# initialize the functions
# The DQDA classifier is a modification to the well-known QDA classifier, where the off-diagonal elements of each class covariance matrix are assumed to be zero – the features are assumed to be uncorrelated. Under multivariate normality, the assumption uncorrelated features is equivalent to the assumption of independent features. The feature-independence assumption is a notable attribute of the Naive Bayes classifier family. The benefit of these classifiers is that they are fast and have much fewer parameters to estimate, especially when the number of features is quite large.
stoa.dlda <- function(sparsity = c(1:30), # when sparsity == 0 it means that we can not fix it.
scaled = TRUE,
type = NULL,
kernel ="rbfdot",
kpar = "automatic",
C = 1,
nu = 0.2,
epsilon = 0.1,
prob.model = FALSE,
class.weights = NULL,
# cross = 0, done by the fit function of predomics
fit = TRUE,
cache = 40,
tol = 0.001,
shrinking = TRUE,
na.action = na.omit,
seed = "NULL",
verbose = TRUE,
experiment.id = NULL,
experiment.description = NULL,
experiment.save = "minimal") {
clf <- list() # create a classifier object
clf$learner <- "stoa.dlda" # name of the method
clf$params <- list() # parameter list
clf$params$parallel <- FALSE
clf$params$sparsity <- sparsity # number of non zero variables in the model
clf$params$prior <- prior # vector with prior probabilities for each class. If NULL (default), then equal probabilities are used. See details.
clf$params$formula <- formula # A formula of the form groups ~ x1 + x2 + ... That is, the response is the grouping factor and the right hand side specifies the (non-factor) discriminators.
clf$params$data <- data #data frame from which variables specified in formula are preferentially to be taken.
clf$params$x <- x # object to print
clf$params$object <- object # trained DQDA object
clf$params$newdata <- newdata # matrix of observations to predict. Each row corresponds to a new observation.
return(clf)
}
#
#
# n <- nrow(iris)
# train <- sample(seq_len(n), n / 2)
# dqda_out <- dqda(Species ~ ., data = iris[train, ])
# predicted <- predict(dqda_out, iris[-train, -5])$class
#
#
#
# ##library(MASS)
# # # again, the second column are predicted probabilities for versicolor
# # iris.lda = lda(Y ~ petal.length+petal.width+sepal.length+sepal.width)
# # plot(iris.lda, dimen=1, type="both") # fit from lda
# # iris.qda = qda(Y ~ petal.length+petal.width+sepal.length+sepal.width)
# # #plot(iris.qda, dimen=1, type="both") # fit from qda
#
# # Pour Visualiser
# ##library(klaR)
# # partimat(Species ~ ., data = iris, method = "lda",plot.matrix = TRUE, imageplot = FALSE)
# # partimat(Species ~ ., data = iris, method = "qda",plot.matrix = TRUE, imageplot = FALSE)
# # partimat(Species ~ ., data = iris, method = "rda",plot.matrix = TRUE, imageplot = FALSE)
# # partimat(Species ~ ., data = iris, method = "naiveBayes",plot.matrix = TRUE, imageplot = FALSE)
# # partimat(Species ~ ., data = iris, method = "sknn",plot.matrix = TRUE, imageplot = FALSE)
# # partimat(Species ~ ., data = iris, method = "svmlight",plot.matrix = TRUE, imageplot = FALSE)
# # partimat(Species ~ ., data = iris, method = "rpart",plot.matrix = TRUE, imageplot = FALSE)
#
#
# #DQDA et DLDA
# ##library(sparsediscrim)
#
# n <- nrow(iris)
# train <- sample(seq_len(n), n / 2)
# dqda_out <- dqda(Species ~ ., data = iris[train, ])
# predicted <- predict(dqda_out, iris[-train, -5])$class
#
# dlda_out <- dlda(Species ~ ., data = iris[train, ])
# predicted <- predict(dqda_out, iris[-train, -5])$class
#
# dqda_out2 <- dqda(x = iris[train, -5], y = iris[train, 5])
# predicted2 <- predict(dqda_out2, iris[-train, -5])$class
# all.equal(predicted, predicted2)
#
#
#
# # Pour les Randomforest
# ##library(ROCR)
# ##library(randomForest)
# ##library(caret)
# # data(iris)
# # # il faut se mettre biclasse on enleve les setosa !!!
# # iris <- iris[(iris$Species != "setosa"),]
# # iris$Species <- factor(iris$Species)
# #
# # #Avec un modèle linéaire
# # fit <- glm(Species~.,iris,family=binomial)
# # #Prédiction
# # train.predict <- predict(fit,newdata = iris,type="response")
# # #Perf
# # par(mfrow=c(2,2))
# # plot(performance(prediction(train.predict,iris$Species),"tpr","fpr"),col = "red")
# # auc1 <- performance(prediction(train.predict,iris$Species),"auc")@y.values[[1]]
# # auc1
# # legend("bottomright",legend=c(paste("Logistic Regression (AUC=",formatC(auc1,digits=4,format="f"),")",sep="")),col=c("red"), lty=1)
# # #Perf Valid Croisée
# # ctrl <- trainControl(method = "cv",classProbs = TRUE,summaryFunction = twoClassSummary)
# # set.seed(1)
# # cvEstimate <- train(Species ~ ., data = iris,method = "glm",metric = "ROC",trControl = ctrl)
# #
# # #Avec un modèle de forêts aléatoire
# # fit <- randomForest(Species ~ ., data=iris, ntree=50)
# # # variable importance
# # varImpPlot(fit)
# # #Prédiction
# # train.predict <- predict(fit,iris,type="prob")[,2]
# # #Perf
# # plot(performance(prediction(train.predict,iris$Species),"tpr","fpr"),col = "red")
# # aucforest <- performance(prediction(train.predict,iris$Species),"auc")@y.values[[1]]
# # aucforest
# # legend("bottomright",legend=c(paste("Random Forests (AUC=",formatC(auc1,digits=4,format="f"),")",sep="")), col=c("red"), lty=1)
\ No newline at end of file
# mainGlobal.R #
# R Code ©Cheveleyre/Prifti/Zucker #
# mardi 3 mai 2016 #
#######################################################
##library(MASS)
# # again, the second column are predicted probabilities for versicolor
# iris.lda = lda(Y ~ petal.length+petal.width+sepal.length+sepal.width)
# plot(iris.lda, dimen=1, type="both") # fit from lda
# iris.qda = qda(Y ~ petal.length+petal.width+sepal.length+sepal.width)
# #plot(iris.qda, dimen=1, type="both") # fit from qda
# Pour Visualiser
##library(klaR)
# partimat(Species ~ ., data = iris, method = "lda",plot.matrix = TRUE, imageplot = FALSE)
# partimat(Species ~ ., data = iris, method = "qda",plot.matrix = TRUE, imageplot = FALSE)
# partimat(Species ~ ., data = iris, method = "rda",plot.matrix = TRUE, imageplot = FALSE)
# partimat(Species ~ ., data = iris, method = "naiveBayes",plot.matrix = TRUE, imageplot = FALSE)
# partimat(Species ~ ., data = iris, method = "sknn",plot.matrix = TRUE, imageplot = FALSE)
# partimat(Species ~ ., data = iris, method = "svmlight",plot.matrix = TRUE, imageplot = FALSE)
# partimat(Species ~ ., data = iris, method = "rpart",plot.matrix = TRUE, imageplot = FALSE)
#DQDA et DLDA
##library(sparsediscrim)
# n <- nrow(iris)
# train <- sample(seq_len(n), n / 2)
# dqda_out <- dqda(Species ~ ., data = iris[train, ])
# predicted <- predict(dqda_out, iris[-train, -5])$class
#
# dlda_out <- dlda(Species ~ ., data = iris[train, ])
# predicted <- predict(dqda_out, iris[-train, -5])$class
#
#
# dqda_out2 <- dqda(x = iris[train, -5], y = iris[train, 5])
# predicted2 <- predict(dqda_out2, iris[-train, -5])$class
# all.equal(predicted, predicted2)
# Pour les Randomforest
##library(ROCR)
##library(randomForest)
##library(caret)
# data(iris)
# # il faut se mettre biclasse on enleve les setosa !!!
# iris <- iris[(iris$Species != "setosa"),]
# iris$Species <- factor(iris$Species)
#
# #Avec un modèle linéaire
# fit <- glm(Species~.,iris,family=binomial)
# #Prédiction
# train.predict <- predict(fit,newdata = iris,type="response")
# #Perf
# par(mfrow=c(2,2))
# plot(performance(prediction(train.predict,iris$Species),"tpr","fpr"),col = "red")
# auc1 <- performance(prediction(train.predict,iris$Species),"auc")@y.values[[1]]
# auc1
# legend("bottomright",legend=c(paste("Logistic Regression (AUC=",formatC(auc1,digits=4,format="f"),")",sep="")),col=c("red"), lty=1)
# #Perf Valid Croisée
# ctrl <- trainControl(method = "cv",classProbs = TRUE,summaryFunction = twoClassSummary)
# set.seed(1)
# cvEstimate <- train(Species ~ ., data = iris,method = "glm",metric = "ROC",trControl = ctrl)
#
# #Avec un modèle de forêts aléatoire
# fit <- randomForest(Species ~ ., data=iris, ntree=50)
# # variable importance
# varImpPlot(fit)
# #Prédiction
# train.predict <- predict(fit,iris,type="prob")[,2]
# #Perf
# plot(performance(prediction(train.predict,iris$Species),"tpr","fpr"),col = "red")
# aucforest <- performance(prediction(train.predict,iris$Species),"auc")@y.values[[1]]
# aucforest
# legend("bottomright",legend=c(paste("Random Forests (AUC=",formatC(auc1,digits=4,format="f"),")",sep="")), col=c("red"), lty=1)
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# =====================================================================================
# create the databases
# =====================================================================================
# (1) cirrhosis stage 1
load("/data/projects/predomics_testing/data/segata_2017/data.bugs/qinn_bug_stage1.rda")
cir_train <- list()
cir_train$X <- as.data.frame(data.qinn.bug.stage1.freq.species); dim(cir_train$X) # 1045 181
cir_train$y <- data.qinn.bug.y
save(cir_train, file="cir_train.rda", compress = TRUE, compression_level = 9)
rm(list=ls()); gc()
# (2) cirrhosis stage 2
load("/data/projects/predomics_testing/data/segata_2017/data.bugs/qinn_bug_stage2.rda")
cir_test <- list()
cir_test$X <- as.data.frame(data.qinn.bug.stage2.freq.species); dim(cir_test$X) # 1045 56
cir_test$y <- data.qinn.bug.y
save(cir_test, file="cir_test.rda", compress = TRUE, compression_level = 9)
rm(list=ls()); gc()
# (3) ibd
load("/data/projects/predomics_testing/data/segata_2017/data.bugs/nielsen_bug.rda")
ibd <- list()
ibd$X <- as.data.frame(data.nielsen.bug.freq.species); dim(ibd$X) # 1045 396
ibd$y <- data.nielsen.bug.y
save(ibd, file="ibd.rda", compress = TRUE, compression_level = 9)
rm(list=ls()); gc()
# (4) obesity
load("/data/projects/predomics_testing/data/segata_2017/data.bugs/lechat_bug.rda")
obesity <- list()
obesity$X <- as.data.frame(data.lechat.bug.freq.species); dim(obesity$X) # 1045 292
obesity$y <- data.lechat.bug.y
save(obesity, file="obesity.rda", compress = TRUE, compression_level = 9)
rm(list=ls()); gc()
# (5) t2d
load("/data/projects/predomics_testing/data/segata_2017/data.bugs/qinj_bug.rda")
t2d <- list()
t2d$X <- as.data.frame(data.qinj.bug.freq.species); dim(t2d$X) # 1045 344
t2d$y <- data.qinj.bug.y
save(t2d, file="t2d.rda", compress = TRUE, compression_level = 9)
rm(list=ls()); gc()
# (6) t2dw
load("/data/projects/predomics_testing/data/segata_2017/data.bugs/karlsson_bug.rda")
t2dw <- list()
t2dw$X <- as.data.frame(data.karlsson.bug.freq.species); dim(t2dw$X) # 1045 344
t2dw$y <- data.karlsson.bug.y
save(t2dw, file="t2dw.rda", compress = TRUE, compression_level = 9)
rm(list=ls()); gc()
# (7) cirrhosis stage 1 counts
load("/data/projects/predomics_testing/data/segata_2017/data.bugs/qinn_bug_stage1.rda")
cir_train_count <- list()
cir_train_count$X <- as.data.frame(data.qinn.bug.stage1.counts.species); dim(cir_train_count$X) # 1045 181
cir_train_count$y <- data.qinn.bug.y
save(cir_train_count, file="cir_train_count.rda", compress = TRUE, compression_level = 9)
rm(list=ls()); gc()
cir_train
cir_test
ibd
obesity
t2d
t2dw
\ No newline at end of file
File added
File added
File added
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Welcome to Overleaf --- just edit your LaTeX on the left,
% and we'll compile it for you on the right. If you give
% someone the link to this page, they can edit at the same
% time. See the help menu above for more info. Enjoy!
%
% Note: you can export the pdf to see the result at full
% resolution.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentclass{article}
\usepackage[latin1]{inputenc}
\usepackage{tikz}
\usetikzlibrary{shapes,arrows}
\usetikzlibrary{positioning,calc}
%%%<
\usepackage{verbatim}
\usepackage[active,tightpage]{preview}
\PreviewEnvironment{tikzpicture}
\setlength\PreviewBorder{5pt}%
%%%>
\begin{comment}
:Title: Simple flow chart
:Tags: Diagrams
With PGF/TikZ you can draw flow charts with relative ease. This flow chart from [1]_
outlines an algorithm for identifying the parameters of an autonomous underwater vehicle model.
Note that relative node
placement has been used to avoid placing nodes explicitly. This feature was
introduced in PGF/TikZ >= 1.09.
.. [1] Bossley, K.; Brown, M. & Harris, C. Neurofuzzy identification of an autonomous underwater vehicle `International Journal of Systems Science`, 1999, 30, 901-913
\end{comment}
\begin{document}
\pagestyle{empty}
% Define block styles
\tikzstyle{populations} = [rectangle, draw, fill=orange,
text width=5em, text badly centered, node distance=3cm, inner sep=5pt]
\tikzstyle{func} = [ellipse, draw, fill=red!60,
text width=7em, text badly centered, node distance=2cm, inner sep=3pt]
\tikzstyle{merge} = [diamond, draw, fill=blue!60,
text width=7em, text badly centered, node distance=5cm, inner sep=0pt]
\tikzstyle{line} = [draw, -latex']
\begin{tikzpicture}[node distance = 5.5cm, auto, remember picture,
inner/.style={circle,d, draw=red!30,fill=pink!20,thick,inner sep=3pt},
outer/.style={draw=green,fill=green!20,thick,inner sep=8pt}]
% Place nodes
\node [populations, inner sep = 15pt] (pop) {Initial population};
\node [outer, label=above:tag select, right of = pop] (tagselect) {