From 17fe5e055f35ce22117b8c320acf586826eaaeb9 Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Wed, 10 Aug 2016 01:54:17 +0200 Subject: [PATCH 01/17] initial version of expression-learners --- R/RLearner_classif_randomForest.R | 6 ++--- R/Task_operators.R | 15 +++++++++++++ R/evaluateLearner.R | 36 ++++++++++++++++++++++++++++++ R/setHyperPars.R | 2 +- R/train.R | 2 ++ R/tuneParams.R | 2 ++ tests/testthat/test_base_Learner.R | 33 +++++++++++++++++++++++++++ 7 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 R/evaluateLearner.R diff --git a/R/RLearner_classif_randomForest.R b/R/RLearner_classif_randomForest.R index f5b27a8c00..ff606bfc89 100644 --- a/R/RLearner_classif_randomForest.R +++ b/R/RLearner_classif_randomForest.R @@ -5,10 +5,10 @@ makeRLearner.classif.randomForest = function() { package = "randomForest", par.set = makeParamSet( makeIntegerLearnerParam(id = "ntree", default = 500L, lower = 1L), - makeIntegerLearnerParam(id = "mtry", lower = 1L), + makeIntegerLearnerParam(id = "mtry", lower = 1L, default = expression(floor(sqrt(p)))), makeLogicalLearnerParam(id = "replace", default = TRUE), - makeNumericVectorLearnerParam(id = "classwt", lower = 0), - makeNumericVectorLearnerParam(id = "cutoff", lower = 0, upper = 1), + makeNumericVectorLearnerParam(id = "classwt", lower = 0, len = expression(k)), + makeNumericVectorLearnerParam(id = "cutoff", lower = 0, upper = 1, len = expression(k)), makeUntypedLearnerParam(id = "strata", tunable = FALSE), makeIntegerVectorLearnerParam(id = "sampsize", lower = 1L), makeIntegerLearnerParam(id = "nodesize", default = 1L, lower = 1L), diff --git a/R/Task_operators.R b/R/Task_operators.R index 2352c6bf53..b44835cb10 100644 --- a/R/Task_operators.R +++ b/R/Task_operators.R @@ -453,3 +453,18 @@ getTaskFactorLevels = function(task) { getTaskWeights = function(task) { task$weights } + + +# returns a dictionary, which contains the task itself (task), the number of +# features (p), the number of observations (n), the task type (type) and in +# case of classification tasks the number of class levels (k) +makeTaskDictionary = function(task) { + dict = list() + dict$task = task + dict$p = getTaskNFeats(task) + dict$n = getTaskSize(task) + dict$type = getTaskType(task) + if (dict$type == "classif") + dict$k = length(getTaskClassLevels(task)) + return(dict) +} diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R new file mode 100644 index 0000000000..99c20ec1b6 --- /dev/null +++ b/R/evaluateLearner.R @@ -0,0 +1,36 @@ +#' @title Evaluates expressions within a learner according to the task. +#' +#' @description Updates the learner by evaluating its expressions based on a specific task. +#' @template arg_learner +#' @template arg_task +#' @return [\code{\link{Learner}}]. +#' @example +#' ## one can evaluate hyperparameters +#' task = makeClassifTask(data = iris, target = "Species") +#' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) +#' lrn2 = evaluateLearner(lrn = lrn1, task = task) +#' +#' lrn1$par.vals$minsplit +#' lrn2$par.vals$minsplit +#' +#' ## alternatively, one can evaluate entire parameter sets +#' task = makeClassifTask(data = iris, target = "Species") +#' lrn1 = makeLearner("classif.randomForest") +#' lrn2 = evaluateLearner(lrn = lrn1, task = task) +#' +#' ## focus on the parameters 'mtry', 'classwt' and 'cutoff' +#' lrn1$par.set +#' lrn2$par.set +#' @export +evaluateLearner = function(lrn, task) { + dict = makeTaskDictionary(task) + if (!is.null(dict)) { + if (ParamHelpers::hasExpression(lrn$par.set)) { + ParamHelpers::checkParamSet(lrn$par.set, dict = dict) + lrn$par.set = ParamHelpers::evaluateParamSet(par.set = lrn$par.set, dict = dict) + } + if (length(lrn$par.vals) > 0 && any(vlapply(lrn$par.vals, is.expression))) + lrn$par.vals = lapply(lrn$par.vals, function(expr) eval(expr, envir = dict)) + } + return(lrn) +} diff --git a/R/setHyperPars.R b/R/setHyperPars.R index 4875f0f1af..8e75988be5 100644 --- a/R/setHyperPars.R +++ b/R/setHyperPars.R @@ -74,7 +74,7 @@ setHyperPars2.Learner = function(learner, par.vals) { learner$par.set$pars[[n]] = makeUntypedLearnerParam(id = n) learner$par.vals[[n]] = p } else { - if (on.par.out.of.bounds != "quiet" && !isFeasible(pd, p)) { + if (on.par.out.of.bounds != "quiet" && !isFeasible(pd, p) && !is.expression(p)) { msg = sprintf("%s is not feasible for parameter '%s'!", convertToShortString(p), pd$id) if (on.par.out.of.bounds == "stop") { stop(msg) diff --git a/R/train.R b/R/train.R index fd0d459d4f..71586f0292 100644 --- a/R/train.R +++ b/R/train.R @@ -30,6 +30,8 @@ #' print(mod) train = function(learner, task, subset, weights = NULL) { learner = checkLearner(learner) + if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) + learner = evaluateLearner(lrn = learner, task = task) assertClass(task, classes = "Task") if (missing(subset)) { subset = seq_len(getTaskSize(task)) diff --git a/R/tuneParams.R b/R/tuneParams.R index 1b859af82b..ad255be9eb 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -78,6 +78,8 @@ #' @seealso \code{\link{generateHyperParsEffectData}} tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) { learner = checkLearner(learner) + if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) + learner = evaluateLearner(lrn = learner, task = task) assertClass(task, classes = "Task") measures = checkMeasures(measures, learner) assertClass(par.set, classes = "ParamSet") diff --git a/tests/testthat/test_base_Learner.R b/tests/testthat/test_base_Learner.R index 32754effb1..86402dadba 100644 --- a/tests/testthat/test_base_Learner.R +++ b/tests/testthat/test_base_Learner.R @@ -22,3 +22,36 @@ test_that("Learner", { expect_error(makeLearner("classif.lda", predict.threshold = 1, "'prob' must hold")) }) + + +test_that("allow expressions", { + ## expressions within parameter sets + lrn1 = makeLearner("classif.randomForest") + lrn2 = evaluateLearner(lrn = lrn1, task = binaryclass.task) + x1 = lrn1$par.set$pars$mtry$default + x2 = lrn2$par.set$pars$mtry$default + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, floor(sqrt(ncol(binaryclass.df)))) + + x1 = lrn1$par.set$pars$classwt$len + x2 = lrn2$par.set$pars$classwt$len + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, 2) + + x1 = lrn1$par.set$pars$cutoff$len + x2 = lrn2$par.set$pars$cutoff$len + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, 2) + + ## expressions within hyperparameters + lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) + lrn2 = evaluateLearner(lrn = lrn1, task = binaryclass.task) + x1 = lrn1$par.vals$minsplit + x2 = lrn2$par.vals$minsplit + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(lrn2$par.vals$minsplit, 2 * getTaskNFeats(binaryclass.task)) +}) From b56128d7c9af8db18d1b074d0dd137f9192f1e0c Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Wed, 10 Aug 2016 11:25:01 +0200 Subject: [PATCH 02/17] allow expressions within tuning param sets --- R/evaluateLearner.R | 55 +++++++++++++++++----- R/train.R | 2 +- R/tuneParams.R | 8 ++-- tests/testthat/test_base_Learner.R | 33 ------------- tests/testthat/test_base_evaluateLearner.R | 48 +++++++++++++++++++ 5 files changed, 98 insertions(+), 48 deletions(-) create mode 100644 tests/testthat/test_base_evaluateLearner.R diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R index 99c20ec1b6..015c1a8580 100644 --- a/R/evaluateLearner.R +++ b/R/evaluateLearner.R @@ -1,11 +1,16 @@ -#' @title Evaluates expressions within a learner according to the task. +#' @title Evaluates expressions within a learner or parameter set according to the task. #' -#' @description Updates the learner by evaluating its expressions based on a specific task. +#' @description Updates learners and/or parameter sets by evaluating their expressions +#' based on a specific task. #' @template arg_learner +#' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr +#' Parameter set of (hyper)parameters and their constraints. +#' Dependent parameters with a \code{requires} field must use \code{quote} and not +#' \code{expression} to define it. #' @template arg_task -#' @return [\code{\link{Learner}}]. +#' @return [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. #' @example -#' ## one can evaluate hyperparameters +#' ## (1) evaluation of a learner's hyperparameters #' task = makeClassifTask(data = iris, target = "Species") #' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) #' lrn2 = evaluateLearner(lrn = lrn1, task = task) @@ -13,7 +18,7 @@ #' lrn1$par.vals$minsplit #' lrn2$par.vals$minsplit #' -#' ## alternatively, one can evaluate entire parameter sets +#' ## (2) evaluation of a learner's entire parameter set #' task = makeClassifTask(data = iris, target = "Species") #' lrn1 = makeLearner("classif.randomForest") #' lrn2 = evaluateLearner(lrn = lrn1, task = task) @@ -21,16 +26,44 @@ #' ## focus on the parameters 'mtry', 'classwt' and 'cutoff' #' lrn1$par.set #' lrn2$par.set +#' +#' ## (3) evaluation of a parameter set +#' task = makeClassifTask(data = iris, target = "Species") +#' ps1 = makeParamSet( +#' makeNumericParam("C", lower = expression(a), upper = expression(b), trafo = function(x) 2^x), +#' makeDiscreteParam("sigma", values = 2^c(-1, 1)), +#' makeDiscreteParam("kernel", values = expression(list(e, f))) +#' ) +#' ps2 = evaluateParset(par.set = ps1, task = task, +#' dict = list(a = -2, b = 3, e = "rbfdot", f = "laplacedot")) #' @export -evaluateLearner = function(lrn, task) { - dict = makeTaskDictionary(task) +evaluateLearner = function(lrn, task, dict = NULL) { + task.dict = makeTaskDictionary(task) + dict = insert(task.dict, dict) if (!is.null(dict)) { - if (ParamHelpers::hasExpression(lrn$par.set)) { - ParamHelpers::checkParamSet(lrn$par.set, dict = dict) - lrn$par.set = ParamHelpers::evaluateParamSet(par.set = lrn$par.set, dict = dict) - } + lrn$par.set = evaluateParset(lrn$par.set, task = task, dict = dict) if (length(lrn$par.vals) > 0 && any(vlapply(lrn$par.vals, is.expression))) lrn$par.vals = lapply(lrn$par.vals, function(expr) eval(expr, envir = dict)) } return(lrn) } + +#' @export +evaluateParset = function(par.set, task, dict = NULL) { + task.dict = makeTaskDictionary(task) + dict = insert(task.dict, dict) + if (!is.null(dict)) { + if (ParamHelpers::hasExpression(par.set)) { + ParamHelpers::checkParamSet(par.set, dict = dict) + par.set = ParamHelpers::evaluateParamSet(par.set = par.set, dict = dict) + ## assure that the value names are also shown if the values list was unnamed + par.set$pars = lapply(par.set$pars, function(x) { + if (is.null(x$values) || !is.null(names(x$values))) + return(x) + names(x$values) = unlist(lapply(x$values, function(vals) vals)) + return(x) + }) + } + } + return(par.set) +} diff --git a/R/train.R b/R/train.R index 71586f0292..1bd6325519 100644 --- a/R/train.R +++ b/R/train.R @@ -28,7 +28,7 @@ #' learner = makeLearner("classif.rpart", minsplit = 7, predict.type = "prob") #' mod = train(learner, task, subset = training.set) #' print(mod) -train = function(learner, task, subset, weights = NULL) { +train = function(learner, task, subset, weights = NULL, dict = NULL) { learner = checkLearner(learner) if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) learner = evaluateLearner(lrn = learner, task = task) diff --git a/R/tuneParams.R b/R/tuneParams.R index ad255be9eb..69f5d15e48 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -76,13 +76,15 @@ #' print(head(as.data.frame(res$opt.path))) #' } #' @seealso \code{\link{generateHyperParsEffectData}} -tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) { +tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info"), dict = NULL) { learner = checkLearner(learner) - if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) - learner = evaluateLearner(lrn = learner, task = task) assertClass(task, classes = "Task") + if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) + learner = evaluateLearner(lrn = learner, task = task, dict = dict) measures = checkMeasures(measures, learner) assertClass(par.set, classes = "ParamSet") + if (ParamHelpers::hasExpression(par.set)) + par.set = evaluateParset(par.set = par.set, task = task, dict = dict) assertClass(control, classes = "TuneControl") if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) stop("Argument resampling must be of class ResampleDesc or ResampleInstance!") diff --git a/tests/testthat/test_base_Learner.R b/tests/testthat/test_base_Learner.R index 86402dadba..32754effb1 100644 --- a/tests/testthat/test_base_Learner.R +++ b/tests/testthat/test_base_Learner.R @@ -22,36 +22,3 @@ test_that("Learner", { expect_error(makeLearner("classif.lda", predict.threshold = 1, "'prob' must hold")) }) - - -test_that("allow expressions", { - ## expressions within parameter sets - lrn1 = makeLearner("classif.randomForest") - lrn2 = evaluateLearner(lrn = lrn1, task = binaryclass.task) - x1 = lrn1$par.set$pars$mtry$default - x2 = lrn2$par.set$pars$mtry$default - expect_true(is.expression(x1)) - expect_true(!is.expression(x2)) - expect_equal(x2, floor(sqrt(ncol(binaryclass.df)))) - - x1 = lrn1$par.set$pars$classwt$len - x2 = lrn2$par.set$pars$classwt$len - expect_true(is.expression(x1)) - expect_true(!is.expression(x2)) - expect_equal(x2, 2) - - x1 = lrn1$par.set$pars$cutoff$len - x2 = lrn2$par.set$pars$cutoff$len - expect_true(is.expression(x1)) - expect_true(!is.expression(x2)) - expect_equal(x2, 2) - - ## expressions within hyperparameters - lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) - lrn2 = evaluateLearner(lrn = lrn1, task = binaryclass.task) - x1 = lrn1$par.vals$minsplit - x2 = lrn2$par.vals$minsplit - expect_true(is.expression(x1)) - expect_true(!is.expression(x2)) - expect_equal(lrn2$par.vals$minsplit, 2 * getTaskNFeats(binaryclass.task)) -}) diff --git a/tests/testthat/test_base_evaluateLearner.R b/tests/testthat/test_base_evaluateLearner.R new file mode 100644 index 0000000000..b9bf9c268a --- /dev/null +++ b/tests/testthat/test_base_evaluateLearner.R @@ -0,0 +1,48 @@ +context("evaluate expressions") + +test_that("expressions in learners", { + ## expressions within parameter sets + lrn1 = makeLearner("classif.randomForest") + lrn2 = evaluateLearner(lrn = lrn1, task = binaryclass.task) + x1 = lrn1$par.set$pars$mtry$default + x2 = lrn2$par.set$pars$mtry$default + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, floor(sqrt(ncol(binaryclass.df)))) + + x1 = lrn1$par.set$pars$classwt$len + x2 = lrn2$par.set$pars$classwt$len + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, 2) + + x1 = lrn1$par.set$pars$cutoff$len + x2 = lrn2$par.set$pars$cutoff$len + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, 2) + + ## expressions within hyperparameters + lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) + lrn2 = evaluateLearner(lrn = lrn1, task = binaryclass.task) + x1 = lrn1$par.vals$minsplit + x2 = lrn2$par.vals$minsplit + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(lrn2$par.vals$minsplit, 2 * getTaskNFeats(binaryclass.task)) +}) + +test_that("expressions in parameter sets", { + ps1 = makeParamSet( + makeNumericParam("C", lower = expression(a), upper = expression(b), trafo = function(x) 2^x), + makeDiscreteParam("sigma", values = 2^c(-1, 1)), + makeDiscreteParam("kernel", values = expression(list(e, f))) + ) + ps2 = evaluateParset(par.set = ps1, task = binaryclass.task, + dict = list(a = -2L, b = 3L, e = "rbfdot", f = "laplacedot")) + + ## expressions within parameter sets + expect_equal(ps2$pars$C$lower, -2L) + expect_equal(ps2$pars$C$upper, 3L) + expect_equal(ps2$pars$kernel$values, list(rbfdot = "rbfdot", laplacedot = "laplacedot")) +}) From 62d6bdb3c7bbd17638915c50eedca8f309833bdc Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Wed, 10 Aug 2016 12:58:54 +0200 Subject: [PATCH 03/17] updated documentation of expression-related files --- NAMESPACE | 2 ++ R/evaluateLearner.R | 3 ++- R/tuneParams.R | 1 + man-roxygen/arg_dict.R | 3 +++ man/evaluateLearner.Rd | 61 ++++++++++++++++++++++++++++++++++++++++++ man/train.Rd | 2 +- man/tuneParams.Rd | 6 ++++- 7 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 man-roxygen/arg_dict.R create mode 100644 man/evaluateLearner.Rd diff --git a/NAMESPACE b/NAMESPACE index 8c4c655660..790375e1e9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -757,6 +757,8 @@ export(dropFeatures) export(dunn) export(estimateRelativeOverfitting) export(estimateResidualVariance) +export(evaluateLearner) +export(evaluateParset) export(expvar) export(f1) export(fdr) diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R index 015c1a8580..3572f9058e 100644 --- a/R/evaluateLearner.R +++ b/R/evaluateLearner.R @@ -8,8 +8,9 @@ #' Dependent parameters with a \code{requires} field must use \code{quote} and not #' \code{expression} to define it. #' @template arg_task +#' @template arg_dict #' @return [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. -#' @example +#' @examples #' ## (1) evaluation of a learner's hyperparameters #' task = makeClassifTask(data = iris, target = "Species") #' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) diff --git a/R/tuneParams.R b/R/tuneParams.R index 69f5d15e48..3d26008653 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -26,6 +26,7 @@ #' @param control [\code{\link{TuneControl}}]\cr #' Control object for search method. Also selects the optimization algorithm for tuning. #' @template arg_showinfo +#' @template arg_dict #' @return [\code{\link{TuneResult}}]. #' @family tune #' @note If you would like to include results from the training data set, make diff --git a/man-roxygen/arg_dict.R b/man-roxygen/arg_dict.R new file mode 100644 index 0000000000..dc2b0483eb --- /dev/null +++ b/man-roxygen/arg_dict.R @@ -0,0 +1,3 @@ +#' @param dict [\code{environment} | \code{list} | \code{NULL}]\cr +#' Environment or list which will be used for evaluating the variables +#' of expressions within a parameter set. The default is \code{NULL}. diff --git a/man/evaluateLearner.Rd b/man/evaluateLearner.Rd new file mode 100644 index 0000000000..d619d6202d --- /dev/null +++ b/man/evaluateLearner.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/evaluateLearner.R +\name{evaluateLearner} +\alias{evaluateLearner} +\title{Evaluates expressions within a learner or parameter set according to the task.} +\usage{ +evaluateLearner(lrn, task, dict = NULL) +} +\arguments{ +\item{task}{[\code{\link{Task}}]\cr +The task.} + +\item{dict}{[\code{environment} | \code{list} | \code{NULL}]\cr +Environment or list which will be used for evaluating the variables +of expressions within a parameter set. The default is \code{NULL}.} + +\item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr +The learner. +If you pass a string the learner will be created via \code{\link{makeLearner}}.} + +\item{par.set}{[\code{\link[ParamHelpers]{ParamSet}}]\cr +Parameter set of (hyper)parameters and their constraints. +Dependent parameters with a \code{requires} field must use \code{quote} and not +\code{expression} to define it.} +} +\value{ +[\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. +} +\description{ +Updates learners and/or parameter sets by evaluating their expressions +based on a specific task. +} +\examples{ +## (1) evaluation of a learner's hyperparameters +task = makeClassifTask(data = iris, target = "Species") +lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) +lrn2 = evaluateLearner(lrn = lrn1, task = task) + +lrn1$par.vals$minsplit +lrn2$par.vals$minsplit + +## (2) evaluation of a learner's entire parameter set +task = makeClassifTask(data = iris, target = "Species") +lrn1 = makeLearner("classif.randomForest") +lrn2 = evaluateLearner(lrn = lrn1, task = task) + +## focus on the parameters 'mtry', 'classwt' and 'cutoff' +lrn1$par.set +lrn2$par.set + +## (3) evaluation of a parameter set +task = makeClassifTask(data = iris, target = "Species") +ps1 = makeParamSet( + makeNumericParam("C", lower = expression(a), upper = expression(b), trafo = function(x) 2^x), + makeDiscreteParam("sigma", values = 2^c(-1, 1)), + makeDiscreteParam("kernel", values = expression(list(e, f))) +) +ps2 = evaluateParset(par.set = ps1, task = task, + dict = list(a = -2, b = 3, e = "rbfdot", f = "laplacedot")) +} + diff --git a/man/train.Rd b/man/train.Rd index a3bec073f3..b292d2edde 100644 --- a/man/train.Rd +++ b/man/train.Rd @@ -4,7 +4,7 @@ \alias{train} \title{Train a learning algorithm.} \usage{ -train(learner, task, subset, weights = NULL) +train(learner, task, subset, weights = NULL, dict = NULL) } \arguments{ \item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr diff --git a/man/tuneParams.Rd b/man/tuneParams.Rd index 009644958d..c608cc55f8 100644 --- a/man/tuneParams.Rd +++ b/man/tuneParams.Rd @@ -5,7 +5,7 @@ \title{Hyperparameter tuning.} \usage{ tuneParams(learner, task, resampling, measures, par.set, control, - show.info = getMlrOption("show.info")) + show.info = getMlrOption("show.info"), dict = NULL) } \arguments{ \item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr @@ -37,6 +37,10 @@ Control object for search method. Also selects the optimization algorithm for tu \item{show.info}{[\code{logical(1)}]\cr Print verbose output on console? Default is set via \code{\link{configureMlr}}.} + +\item{dict}{[\code{environment} | \code{list} | \code{NULL}]\cr +Environment or list which will be used for evaluating the variables +of expressions within a parameter set. The default is \code{NULL}.} } \value{ [\code{\link{TuneResult}}]. From 8b20d6fd0aa090e753a9882802593f0c0c453fea Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Wed, 10 Aug 2016 17:07:09 +0200 Subject: [PATCH 04/17] fixing naming issues --- R/evaluateLearner.R | 16 ++++++++-------- R/train.R | 3 ++- R/tuneParams.R | 2 +- man/evaluateLearner.Rd | 10 +++++----- man/train.Rd | 4 ++++ tests/testthat/test_base_evaluateLearner.R | 4 ++-- 6 files changed, 22 insertions(+), 17 deletions(-) diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R index 3572f9058e..85602ea41e 100644 --- a/R/evaluateLearner.R +++ b/R/evaluateLearner.R @@ -38,24 +38,24 @@ #' ps2 = evaluateParset(par.set = ps1, task = task, #' dict = list(a = -2, b = 3, e = "rbfdot", f = "laplacedot")) #' @export -evaluateLearner = function(lrn, task, dict = NULL) { +evaluateLearner = function(learner, task, dict = NULL) { task.dict = makeTaskDictionary(task) dict = insert(task.dict, dict) if (!is.null(dict)) { - lrn$par.set = evaluateParset(lrn$par.set, task = task, dict = dict) - if (length(lrn$par.vals) > 0 && any(vlapply(lrn$par.vals, is.expression))) - lrn$par.vals = lapply(lrn$par.vals, function(expr) eval(expr, envir = dict)) + learner$par.set = evaluateParset(learner$par.set, task = task, dict = dict) + if (length(learner$par.vals) > 0 && any(vlapply(learner$par.vals, is.expression))) + learner$par.vals = lapply(learner$par.vals, function(expr) eval(expr, envir = dict)) } - return(lrn) + return(learner) } #' @export evaluateParset = function(par.set, task, dict = NULL) { - task.dict = makeTaskDictionary(task) + task.dict = makeTaskDictionary(task = task) dict = insert(task.dict, dict) if (!is.null(dict)) { - if (ParamHelpers::hasExpression(par.set)) { - ParamHelpers::checkParamSet(par.set, dict = dict) + if (ParamHelpers::hasExpression(par = par.set)) { + ParamHelpers::checkParamSet(par.set = par.set, dict = dict) par.set = ParamHelpers::evaluateParamSet(par.set = par.set, dict = dict) ## assure that the value names are also shown if the values list was unnamed par.set$pars = lapply(par.set$pars, function(x) { diff --git a/R/train.R b/R/train.R index 1bd6325519..48fd0a7a67 100644 --- a/R/train.R +++ b/R/train.R @@ -11,6 +11,7 @@ #' If given, must be of same length as \code{subset} and in corresponding order. #' By default \code{NULL} which means no weights are used unless specified in the task (\code{\link{Task}}). #' Weights from the task will be overwritten. +#' @template arg_dict #' @return [\code{\link{WrappedModel}}]. #' @export #' @seealso \code{\link{predict.WrappedModel}} @@ -31,7 +32,7 @@ train = function(learner, task, subset, weights = NULL, dict = NULL) { learner = checkLearner(learner) if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) - learner = evaluateLearner(lrn = learner, task = task) + learner = evaluateLearner(learner = learner, task = task) assertClass(task, classes = "Task") if (missing(subset)) { subset = seq_len(getTaskSize(task)) diff --git a/R/tuneParams.R b/R/tuneParams.R index 3d26008653..cff0102295 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -81,7 +81,7 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, sho learner = checkLearner(learner) assertClass(task, classes = "Task") if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) - learner = evaluateLearner(lrn = learner, task = task, dict = dict) + learner = evaluateLearner(learner = learner, task = task, dict = dict) measures = checkMeasures(measures, learner) assertClass(par.set, classes = "ParamSet") if (ParamHelpers::hasExpression(par.set)) diff --git a/man/evaluateLearner.Rd b/man/evaluateLearner.Rd index d619d6202d..72a9b2072a 100644 --- a/man/evaluateLearner.Rd +++ b/man/evaluateLearner.Rd @@ -4,9 +4,13 @@ \alias{evaluateLearner} \title{Evaluates expressions within a learner or parameter set according to the task.} \usage{ -evaluateLearner(lrn, task, dict = NULL) +evaluateLearner(learner, task, dict = NULL) } \arguments{ +\item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr +The learner. +If you pass a string the learner will be created via \code{\link{makeLearner}}.} + \item{task}{[\code{\link{Task}}]\cr The task.} @@ -14,10 +18,6 @@ The task.} Environment or list which will be used for evaluating the variables of expressions within a parameter set. The default is \code{NULL}.} -\item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr -The learner. -If you pass a string the learner will be created via \code{\link{makeLearner}}.} - \item{par.set}{[\code{\link[ParamHelpers]{ParamSet}}]\cr Parameter set of (hyper)parameters and their constraints. Dependent parameters with a \code{requires} field must use \code{quote} and not diff --git a/man/train.Rd b/man/train.Rd index b292d2edde..a3a1f954f5 100644 --- a/man/train.Rd +++ b/man/train.Rd @@ -23,6 +23,10 @@ Optional, non-negative case weight vector to be used during fitting. If given, must be of same length as \code{subset} and in corresponding order. By default \code{NULL} which means no weights are used unless specified in the task (\code{\link{Task}}). Weights from the task will be overwritten.} + +\item{dict}{[\code{environment} | \code{list} | \code{NULL}]\cr +Environment or list which will be used for evaluating the variables +of expressions within a parameter set. The default is \code{NULL}.} } \value{ [\code{\link{WrappedModel}}]. diff --git a/tests/testthat/test_base_evaluateLearner.R b/tests/testthat/test_base_evaluateLearner.R index b9bf9c268a..e93c33b0b1 100644 --- a/tests/testthat/test_base_evaluateLearner.R +++ b/tests/testthat/test_base_evaluateLearner.R @@ -3,7 +3,7 @@ context("evaluate expressions") test_that("expressions in learners", { ## expressions within parameter sets lrn1 = makeLearner("classif.randomForest") - lrn2 = evaluateLearner(lrn = lrn1, task = binaryclass.task) + lrn2 = evaluateLearner(learner = lrn1, task = binaryclass.task) x1 = lrn1$par.set$pars$mtry$default x2 = lrn2$par.set$pars$mtry$default expect_true(is.expression(x1)) @@ -24,7 +24,7 @@ test_that("expressions in learners", { ## expressions within hyperparameters lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) - lrn2 = evaluateLearner(lrn = lrn1, task = binaryclass.task) + lrn2 = evaluateLearner(learner = lrn1, task = binaryclass.task) x1 = lrn1$par.vals$minsplit x2 = lrn2$par.vals$minsplit expect_true(is.expression(x1)) From f85ad8737cdcb0665beecdae8eecc4ce2fbe518d Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Wed, 10 Aug 2016 20:38:33 +0200 Subject: [PATCH 05/17] further doc fixes --- R/evaluateLearner.R | 7 +++++-- man/evaluateLearner.Rd | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R index 85602ea41e..11ab6bd882 100644 --- a/R/evaluateLearner.R +++ b/R/evaluateLearner.R @@ -10,11 +10,13 @@ #' @template arg_task #' @template arg_dict #' @return [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. +#' @name evaluateLearner +#' @rdname evaluateLearner #' @examples #' ## (1) evaluation of a learner's hyperparameters #' task = makeClassifTask(data = iris, target = "Species") #' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) -#' lrn2 = evaluateLearner(lrn = lrn1, task = task) +#' lrn2 = evaluateLearner(learner = lrn1, task = task) #' #' lrn1$par.vals$minsplit #' lrn2$par.vals$minsplit @@ -22,7 +24,7 @@ #' ## (2) evaluation of a learner's entire parameter set #' task = makeClassifTask(data = iris, target = "Species") #' lrn1 = makeLearner("classif.randomForest") -#' lrn2 = evaluateLearner(lrn = lrn1, task = task) +#' lrn2 = evaluateLearner(learner = lrn1, task = task) #' #' ## focus on the parameters 'mtry', 'classwt' and 'cutoff' #' lrn1$par.set @@ -49,6 +51,7 @@ evaluateLearner = function(learner, task, dict = NULL) { return(learner) } +#' @rdname evaluateLearner #' @export evaluateParset = function(par.set, task, dict = NULL) { task.dict = makeTaskDictionary(task = task) diff --git a/man/evaluateLearner.Rd b/man/evaluateLearner.Rd index 72a9b2072a..c985211ccc 100644 --- a/man/evaluateLearner.Rd +++ b/man/evaluateLearner.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/evaluateLearner.R \name{evaluateLearner} \alias{evaluateLearner} +\alias{evaluateParset} \title{Evaluates expressions within a learner or parameter set according to the task.} \usage{ evaluateLearner(learner, task, dict = NULL) + +evaluateParset(par.set, task, dict = NULL) } \arguments{ \item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr @@ -34,7 +37,7 @@ based on a specific task. ## (1) evaluation of a learner's hyperparameters task = makeClassifTask(data = iris, target = "Species") lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) -lrn2 = evaluateLearner(lrn = lrn1, task = task) +lrn2 = evaluateLearner(learner = lrn1, task = task) lrn1$par.vals$minsplit lrn2$par.vals$minsplit @@ -42,7 +45,7 @@ lrn2$par.vals$minsplit ## (2) evaluation of a learner's entire parameter set task = makeClassifTask(data = iris, target = "Species") lrn1 = makeLearner("classif.randomForest") -lrn2 = evaluateLearner(lrn = lrn1, task = task) +lrn2 = evaluateLearner(learner = lrn1, task = task) ## focus on the parameters 'mtry', 'classwt' and 'cutoff' lrn1$par.set From a13c4eb832b11eaca28068d112bce96d59a1e359 Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Thu, 11 Aug 2016 18:03:58 +0200 Subject: [PATCH 06/17] remove dict argument --- R/evaluateLearner.R | 47 +++++++++------------- R/train.R | 3 +- R/tuneParams.R | 7 ++-- tests/testthat/test_base_evaluateLearner.R | 14 +++---- 4 files changed, 29 insertions(+), 42 deletions(-) diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R index 11ab6bd882..9a93ebfdf9 100644 --- a/R/evaluateLearner.R +++ b/R/evaluateLearner.R @@ -8,7 +8,6 @@ #' Dependent parameters with a \code{requires} field must use \code{quote} and not #' \code{expression} to define it. #' @template arg_task -#' @template arg_dict #' @return [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. #' @name evaluateLearner #' @rdname evaluateLearner @@ -33,41 +32,33 @@ #' ## (3) evaluation of a parameter set #' task = makeClassifTask(data = iris, target = "Species") #' ps1 = makeParamSet( -#' makeNumericParam("C", lower = expression(a), upper = expression(b), trafo = function(x) 2^x), -#' makeDiscreteParam("sigma", values = 2^c(-1, 1)), -#' makeDiscreteParam("kernel", values = expression(list(e, f))) +#' makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), +#' makeDiscreteParam("sigma", values = expression(list(k, p))) #' ) -#' ps2 = evaluateParset(par.set = ps1, task = task, -#' dict = list(a = -2, b = 3, e = "rbfdot", f = "laplacedot")) +#' ps2 = evaluateParset(par.set = ps1, task = task) #' @export -evaluateLearner = function(learner, task, dict = NULL) { - task.dict = makeTaskDictionary(task) - dict = insert(task.dict, dict) - if (!is.null(dict)) { - learner$par.set = evaluateParset(learner$par.set, task = task, dict = dict) - if (length(learner$par.vals) > 0 && any(vlapply(learner$par.vals, is.expression))) - learner$par.vals = lapply(learner$par.vals, function(expr) eval(expr, envir = dict)) - } +evaluateLearner = function(learner, task) { + dict = makeTaskDictionary(task) + learner$par.set = evaluateParset(learner$par.set, task = task) + if (length(learner$par.vals) > 0 && any(vlapply(learner$par.vals, is.expression))) + learner$par.vals = lapply(learner$par.vals, function(expr) eval(expr, envir = dict)) return(learner) } #' @rdname evaluateLearner #' @export -evaluateParset = function(par.set, task, dict = NULL) { - task.dict = makeTaskDictionary(task = task) - dict = insert(task.dict, dict) - if (!is.null(dict)) { - if (ParamHelpers::hasExpression(par = par.set)) { - ParamHelpers::checkParamSet(par.set = par.set, dict = dict) - par.set = ParamHelpers::evaluateParamSet(par.set = par.set, dict = dict) - ## assure that the value names are also shown if the values list was unnamed - par.set$pars = lapply(par.set$pars, function(x) { - if (is.null(x$values) || !is.null(names(x$values))) - return(x) - names(x$values) = unlist(lapply(x$values, function(vals) vals)) +evaluateParset = function(par.set, task) { + dict = makeTaskDictionary(task = task) + if (ParamHelpers::hasExpression(par = par.set)) { + ParamHelpers::checkParamSet(par.set = par.set, dict = dict) + par.set = ParamHelpers::evaluateParamSet(par.set = par.set, dict = dict) + ## assure that the value names are also shown if the values list was unnamed + par.set$pars = lapply(par.set$pars, function(x) { + if (is.null(x$values) || !is.null(names(x$values))) return(x) - }) - } + names(x$values) = unlist(lapply(x$values, function(vals) vals)) + return(x) + }) } return(par.set) } diff --git a/R/train.R b/R/train.R index 48fd0a7a67..a42981370a 100644 --- a/R/train.R +++ b/R/train.R @@ -11,7 +11,6 @@ #' If given, must be of same length as \code{subset} and in corresponding order. #' By default \code{NULL} which means no weights are used unless specified in the task (\code{\link{Task}}). #' Weights from the task will be overwritten. -#' @template arg_dict #' @return [\code{\link{WrappedModel}}]. #' @export #' @seealso \code{\link{predict.WrappedModel}} @@ -29,7 +28,7 @@ #' learner = makeLearner("classif.rpart", minsplit = 7, predict.type = "prob") #' mod = train(learner, task, subset = training.set) #' print(mod) -train = function(learner, task, subset, weights = NULL, dict = NULL) { +train = function(learner, task, subset, weights = NULL) { learner = checkLearner(learner) if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) learner = evaluateLearner(learner = learner, task = task) diff --git a/R/tuneParams.R b/R/tuneParams.R index cff0102295..ac54878268 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -26,7 +26,6 @@ #' @param control [\code{\link{TuneControl}}]\cr #' Control object for search method. Also selects the optimization algorithm for tuning. #' @template arg_showinfo -#' @template arg_dict #' @return [\code{\link{TuneResult}}]. #' @family tune #' @note If you would like to include results from the training data set, make @@ -77,15 +76,15 @@ #' print(head(as.data.frame(res$opt.path))) #' } #' @seealso \code{\link{generateHyperParsEffectData}} -tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info"), dict = NULL) { +tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) { learner = checkLearner(learner) assertClass(task, classes = "Task") if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) - learner = evaluateLearner(learner = learner, task = task, dict = dict) + learner = evaluateLearner(learner = learner, task = task) measures = checkMeasures(measures, learner) assertClass(par.set, classes = "ParamSet") if (ParamHelpers::hasExpression(par.set)) - par.set = evaluateParset(par.set = par.set, task = task, dict = dict) + par.set = evaluateParset(par.set = par.set, task = task) assertClass(control, classes = "TuneControl") if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) stop("Argument resampling must be of class ResampleDesc or ResampleInstance!") diff --git a/tests/testthat/test_base_evaluateLearner.R b/tests/testthat/test_base_evaluateLearner.R index e93c33b0b1..cc30920067 100644 --- a/tests/testthat/test_base_evaluateLearner.R +++ b/tests/testthat/test_base_evaluateLearner.R @@ -34,15 +34,13 @@ test_that("expressions in learners", { test_that("expressions in parameter sets", { ps1 = makeParamSet( - makeNumericParam("C", lower = expression(a), upper = expression(b), trafo = function(x) 2^x), - makeDiscreteParam("sigma", values = 2^c(-1, 1)), - makeDiscreteParam("kernel", values = expression(list(e, f))) + makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), + makeDiscreteParam("sigma", values = expression(list(p, k))) ) - ps2 = evaluateParset(par.set = ps1, task = binaryclass.task, - dict = list(a = -2L, b = 3L, e = "rbfdot", f = "laplacedot")) + ps2 = evaluateParset(par.set = ps1, task = binaryclass.task) ## expressions within parameter sets - expect_equal(ps2$pars$C$lower, -2L) - expect_equal(ps2$pars$C$upper, 3L) - expect_equal(ps2$pars$kernel$values, list(rbfdot = "rbfdot", laplacedot = "laplacedot")) + expect_equal(ps2$pars$C$lower, 2L) + expect_equal(ps2$pars$C$upper, 208L) + expect_equal(ps2$pars$sigma$values, list("60" = 60, "2" = 2)) }) From eb9ec14c5b326ace821c17ecb90cf8037c3671f7 Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Thu, 11 Aug 2016 18:12:20 +0200 Subject: [PATCH 07/17] updating man-pages --- man/evaluateLearner.Rd | 16 +++++----------- man/train.Rd | 6 +----- man/tuneParams.Rd | 6 +----- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/man/evaluateLearner.Rd b/man/evaluateLearner.Rd index c985211ccc..44c69c2a85 100644 --- a/man/evaluateLearner.Rd +++ b/man/evaluateLearner.Rd @@ -5,9 +5,9 @@ \alias{evaluateParset} \title{Evaluates expressions within a learner or parameter set according to the task.} \usage{ -evaluateLearner(learner, task, dict = NULL) +evaluateLearner(learner, task) -evaluateParset(par.set, task, dict = NULL) +evaluateParset(par.set, task) } \arguments{ \item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr @@ -17,10 +17,6 @@ If you pass a string the learner will be created via \code{\link{makeLearner}}.} \item{task}{[\code{\link{Task}}]\cr The task.} -\item{dict}{[\code{environment} | \code{list} | \code{NULL}]\cr -Environment or list which will be used for evaluating the variables -of expressions within a parameter set. The default is \code{NULL}.} - \item{par.set}{[\code{\link[ParamHelpers]{ParamSet}}]\cr Parameter set of (hyper)parameters and their constraints. Dependent parameters with a \code{requires} field must use \code{quote} and not @@ -54,11 +50,9 @@ lrn2$par.set ## (3) evaluation of a parameter set task = makeClassifTask(data = iris, target = "Species") ps1 = makeParamSet( - makeNumericParam("C", lower = expression(a), upper = expression(b), trafo = function(x) 2^x), - makeDiscreteParam("sigma", values = 2^c(-1, 1)), - makeDiscreteParam("kernel", values = expression(list(e, f))) + makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), + makeDiscreteParam("sigma", values = expression(list(k, p))) ) -ps2 = evaluateParset(par.set = ps1, task = task, - dict = list(a = -2, b = 3, e = "rbfdot", f = "laplacedot")) +ps2 = evaluateParset(par.set = ps1, task = task) } diff --git a/man/train.Rd b/man/train.Rd index a3a1f954f5..a3bec073f3 100644 --- a/man/train.Rd +++ b/man/train.Rd @@ -4,7 +4,7 @@ \alias{train} \title{Train a learning algorithm.} \usage{ -train(learner, task, subset, weights = NULL, dict = NULL) +train(learner, task, subset, weights = NULL) } \arguments{ \item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr @@ -23,10 +23,6 @@ Optional, non-negative case weight vector to be used during fitting. If given, must be of same length as \code{subset} and in corresponding order. By default \code{NULL} which means no weights are used unless specified in the task (\code{\link{Task}}). Weights from the task will be overwritten.} - -\item{dict}{[\code{environment} | \code{list} | \code{NULL}]\cr -Environment or list which will be used for evaluating the variables -of expressions within a parameter set. The default is \code{NULL}.} } \value{ [\code{\link{WrappedModel}}]. diff --git a/man/tuneParams.Rd b/man/tuneParams.Rd index c608cc55f8..009644958d 100644 --- a/man/tuneParams.Rd +++ b/man/tuneParams.Rd @@ -5,7 +5,7 @@ \title{Hyperparameter tuning.} \usage{ tuneParams(learner, task, resampling, measures, par.set, control, - show.info = getMlrOption("show.info"), dict = NULL) + show.info = getMlrOption("show.info")) } \arguments{ \item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr @@ -37,10 +37,6 @@ Control object for search method. Also selects the optimization algorithm for tu \item{show.info}{[\code{logical(1)}]\cr Print verbose output on console? Default is set via \code{\link{configureMlr}}.} - -\item{dict}{[\code{environment} | \code{list} | \code{NULL}]\cr -Environment or list which will be used for evaluating the variables -of expressions within a parameter set. The default is \code{NULL}.} } \value{ [\code{\link{TuneResult}}]. From eaea99e1ec7c7efaf34e2ddefdf5cc1ae382532f Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Thu, 11 Aug 2016 18:16:42 +0200 Subject: [PATCH 08/17] rm dict_template --- man-roxygen/arg_dict.R | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 man-roxygen/arg_dict.R diff --git a/man-roxygen/arg_dict.R b/man-roxygen/arg_dict.R deleted file mode 100644 index dc2b0483eb..0000000000 --- a/man-roxygen/arg_dict.R +++ /dev/null @@ -1,3 +0,0 @@ -#' @param dict [\code{environment} | \code{list} | \code{NULL}]\cr -#' Environment or list which will be used for evaluating the variables -#' of expressions within a parameter set. The default is \code{NULL}. From 7227a9c02feee230c86db12f31109b42aed1aeb5 Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Fri, 12 Aug 2016 18:47:49 +0200 Subject: [PATCH 09/17] better documentation of expression-related functions --- R/Task_operators.R | 1 + R/evaluateLearner.R | 22 +++++++++++++---- R/makeLearner.R | 12 ++++++++++ R/setHyperPars.R | 13 ++++++++++ man/evaluateLearner.Rd | 22 +++++++++++++---- man/makeLearner.Rd | 14 +++++++++++ man/setHyperPars.Rd | 15 ++++++++++++ tests/testthat/helper_mock_learners.R | 19 +++++++++++++++ tests/testthat/test_base_evaluateLearner.R | 28 +++++++++++++++------- 9 files changed, 128 insertions(+), 18 deletions(-) diff --git a/R/Task_operators.R b/R/Task_operators.R index b44835cb10..7c2e135450 100644 --- a/R/Task_operators.R +++ b/R/Task_operators.R @@ -466,5 +466,6 @@ makeTaskDictionary = function(task) { dict$type = getTaskType(task) if (dict$type == "classif") dict$k = length(getTaskClassLevels(task)) + # dict$keys = setdiff(c(names(task$task.desc), names(task), "data", names(task$env$data)), names(dict)) return(dict) } diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R index 9a93ebfdf9..a17f0143ee 100644 --- a/R/evaluateLearner.R +++ b/R/evaluateLearner.R @@ -1,7 +1,7 @@ #' @title Evaluates expressions within a learner or parameter set according to the task. #' #' @description Updates learners and/or parameter sets by evaluating their expressions -#' based on a specific task. +#' based on a specific task. An overview of the possible expressions can be found in the details. #' @template arg_learner #' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr #' Parameter set of (hyper)parameters and their constraints. @@ -11,14 +11,26 @@ #' @return [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. #' @name evaluateLearner #' @rdname evaluateLearner +#' @details The expressions can be based on any information provided by the task. For convenience, +#' the most often used keys are available directly +#' \itemize{ +#' \item{\code{task}:} the task itself, allowing to access any of its elements +#' \item{\code{p}:} the number of features in the task +#' \item{\code{n}:} the number of observations in the task +#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" +#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +#' } +#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could +#' access the "blocking" via \code{task$task.desc$has.blocking}. #' @examples #' ## (1) evaluation of a learner's hyperparameters #' task = makeClassifTask(data = iris, target = "Species") -#' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) +#' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), +#' minbucket = expression(3L + 4L * task$task.desc$has.blocking)) #' lrn2 = evaluateLearner(learner = lrn1, task = task) #' -#' lrn1$par.vals$minsplit -#' lrn2$par.vals$minsplit +#' lrn1$par.vals +#' lrn2$par.vals #' #' ## (2) evaluation of a learner's entire parameter set #' task = makeClassifTask(data = iris, target = "Species") @@ -38,7 +50,7 @@ #' ps2 = evaluateParset(par.set = ps1, task = task) #' @export evaluateLearner = function(learner, task) { - dict = makeTaskDictionary(task) + dict = makeTaskDictionary(task = task) learner$par.set = evaluateParset(learner$par.set, task = task) if (length(learner$par.vals) > 0 && any(vlapply(learner$par.vals, is.expression))) learner$par.vals = lapply(learner$par.vals, function(expr) eval(expr, envir = dict)) diff --git a/R/makeLearner.R b/R/makeLearner.R index d0e65345da..64ef3645fb 100644 --- a/R/makeLearner.R +++ b/R/makeLearner.R @@ -43,11 +43,23 @@ #' @return [\code{\link{Learner}}]. #' @family learner #' @export +#' @details Note that learners can also contain task dependent expressions, which can be based on any +#' information provided by the task. For convenience, the most often used keys are available directly +#' \itemize{ +#' \item{\code{task}:} the task itself, allowing to access any of its elements +#' \item{\code{p}:} the number of features in the task +#' \item{\code{n}:} the number of observations in the task +#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" +#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +#' } +#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could +#' access the "blocking" via \code{task$task.desc$has.blocking}. #' @aliases Learner #' @seealso [\code{\link{resample}}], [\code{\link{predict.WrappedModel}}] #' @examples #' makeLearner("classif.rpart") #' makeLearner("classif.lda", predict.type = "prob") +#' makeLearner("classif.rpart", minsplit = expression(k)) #' lrn = makeLearner("classif.lda", method = "t", nu = 10) #' print(lrn$par.vals) makeLearner = function(cl, id = cl, predict.type = "response", predict.threshold = NULL, diff --git a/R/setHyperPars.R b/R/setHyperPars.R index 8e75988be5..4fd73416dd 100644 --- a/R/setHyperPars.R +++ b/R/setHyperPars.R @@ -11,14 +11,27 @@ #' @note If a named (hyper)parameter can't be found for the given learner, the 3 #' closest (hyper)parameter names will be output in case the user mistyped. #' @export +#' @details Note that learners can also contain task dependent expressions, which can be based on any +#' information provided by the task. For convenience, the most often used keys are available directly +#' \itemize{ +#' \item{\code{task}:} the task itself, allowing to access any of its elements +#' \item{\code{p}:} the number of features in the task +#' \item{\code{n}:} the number of observations in the task +#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" +#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +#' } +#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could +#' access the "blocking" via \code{task$task.desc$has.blocking}. #' @family learner #' @importFrom utils adist #' @examples #' cl1 = makeLearner("classif.ksvm", sigma = 1) #' cl2 = setHyperPars(cl1, sigma = 10, par.vals = list(C = 2)) +#' cl3 = setHyperPars(cl2, C = expression(round(n / p))) #' print(cl1) #' # note the now set and altered hyperparameters: #' print(cl2) +#' print(cl3) setHyperPars = function(learner, ..., par.vals = list()) { args = list(...) assertClass(learner, classes = "Learner") diff --git a/man/evaluateLearner.Rd b/man/evaluateLearner.Rd index 44c69c2a85..e883d77841 100644 --- a/man/evaluateLearner.Rd +++ b/man/evaluateLearner.Rd @@ -27,16 +27,30 @@ Dependent parameters with a \code{requires} field must use \code{quote} and not } \description{ Updates learners and/or parameter sets by evaluating their expressions -based on a specific task. +based on a specific task. An overview of the possible expressions can be found in the details. +} +\details{ +The expressions can be based on any information provided by the task. For convenience, +the most often used keys are available directly +\itemize{ + \item{\code{task}:} the task itself, allowing to access any of its elements + \item{\code{p}:} the number of features in the task + \item{\code{n}:} the number of observations in the task + \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" + \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +} +However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could +access the "blocking" via \code{task$task.desc$has.blocking}. } \examples{ ## (1) evaluation of a learner's hyperparameters task = makeClassifTask(data = iris, target = "Species") -lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) +lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), + minbucket = expression(3L + 4L * task$task.desc$has.blocking)) lrn2 = evaluateLearner(learner = lrn1, task = task) -lrn1$par.vals$minsplit -lrn2$par.vals$minsplit +lrn1$par.vals +lrn2$par.vals ## (2) evaluation of a learner's entire parameter set task = makeClassifTask(data = iris, target = "Species") diff --git a/man/makeLearner.Rd b/man/makeLearner.Rd index 50f619d53d..de87c79e74 100644 --- a/man/makeLearner.Rd +++ b/man/makeLearner.Rd @@ -72,9 +72,23 @@ value selects the label. The threshold used to assign the label can later be cha To see all possible properties of a learner, go to: \code{\link{LearnerProperties}}. } +\details{ +Note that learners can also contain task dependent expressions, which can be based on any +information provided by the task. For convenience, the most often used keys are available directly +\itemize{ + \item{\code{task}:} the task itself, allowing to access any of its elements + \item{\code{p}:} the number of features in the task + \item{\code{n}:} the number of observations in the task + \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" + \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +} +However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could +access the "blocking" via \code{task$task.desc$has.blocking}. +} \examples{ makeLearner("classif.rpart") makeLearner("classif.lda", predict.type = "prob") +makeLearner("classif.rpart", minsplit = expression(k)) lrn = makeLearner("classif.lda", method = "t", nu = 10) print(lrn$par.vals) } diff --git a/man/setHyperPars.Rd b/man/setHyperPars.Rd index f54ef06cb7..ba49d48215 100644 --- a/man/setHyperPars.Rd +++ b/man/setHyperPars.Rd @@ -25,6 +25,19 @@ Optional list of named (hyper)parameter settings. The arguments in \description{ Set the hyperparameters of a learner object. } +\details{ +Note that learners can also contain task dependent expressions, which can be based on any +information provided by the task. For convenience, the most often used keys are available directly +\itemize{ + \item{\code{task}:} the task itself, allowing to access any of its elements + \item{\code{p}:} the number of features in the task + \item{\code{n}:} the number of observations in the task + \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" + \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +} +However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could +access the "blocking" via \code{task$task.desc$has.blocking}. +} \note{ If a named (hyper)parameter can't be found for the given learner, the 3 closest (hyper)parameter names will be output in case the user mistyped. @@ -32,9 +45,11 @@ closest (hyper)parameter names will be output in case the user mistyped. \examples{ cl1 = makeLearner("classif.ksvm", sigma = 1) cl2 = setHyperPars(cl1, sigma = 10, par.vals = list(C = 2)) +cl3 = setHyperPars(cl2, C = expression(round(n / p))) print(cl1) # note the now set and altered hyperparameters: print(cl2) +print(cl3) } \seealso{ Other learner: \code{\link{LearnerProperties}}, diff --git a/tests/testthat/helper_mock_learners.R b/tests/testthat/helper_mock_learners.R index 0d952aa0b3..ddadeb7bfa 100644 --- a/tests/testthat/helper_mock_learners.R +++ b/tests/testthat/helper_mock_learners.R @@ -117,4 +117,23 @@ registerS3method("predictLearner", "regr.__mlrmocklearners__6", predictLearner.r +# contains expressions in the parameter set and in the hyper params +makeRLearner.classif.__mlrmocklearners__7 = function() { + makeRLearnerClassif( + cl = "classif.__mlrmocklearners__7", + package = character(0L), + par.set = makeParamSet( + makeIntegerLearnerParam(id = "minsplit", default = 20L, lower = 1L), + makeIntegerLearnerParam(id = "mtry", lower = 1L, default = expression(floor(sqrt(p)))), + makeLogicalLearnerParam(id = "importance", default = expression(task$task.desc$has.blocking)), + makeNumericVectorLearnerParam(id = "classwt", lower = 0, len = expression(k)), + keys = c("task.desc", "has.blocking") + ), + par.vals = list(minsplit = expression(ceiling(0.1 * (n + p)))), + properties = c("twoclass", "multiclass", "numerics", "factors", "ordered", "prob"), + name = "Mock Learner 7", + short.name = "mock7" + ) +} +registerS3method("makeRLearner", "classif.__mlrmocklearners__7", makeRLearner.classif.__mlrmocklearners__7) diff --git a/tests/testthat/test_base_evaluateLearner.R b/tests/testthat/test_base_evaluateLearner.R index cc30920067..45d6fb1e2c 100644 --- a/tests/testthat/test_base_evaluateLearner.R +++ b/tests/testthat/test_base_evaluateLearner.R @@ -1,28 +1,38 @@ context("evaluate expressions") test_that("expressions in learners", { - ## expressions within parameter sets - lrn1 = makeLearner("classif.randomForest") + ## expressions within 'pre-defined' learners + ## (1) expressions within default of parameter sets + lrn1 = makeLearner("classif.__mlrmocklearners__7") lrn2 = evaluateLearner(learner = lrn1, task = binaryclass.task) x1 = lrn1$par.set$pars$mtry$default x2 = lrn2$par.set$pars$mtry$default expect_true(is.expression(x1)) expect_true(!is.expression(x2)) expect_equal(x2, floor(sqrt(ncol(binaryclass.df)))) - + + ## (2) expressions within length of parameter sets x1 = lrn1$par.set$pars$classwt$len x2 = lrn2$par.set$pars$classwt$len expect_true(is.expression(x1)) expect_true(!is.expression(x2)) - expect_equal(x2, 2) - - x1 = lrn1$par.set$pars$cutoff$len - x2 = lrn2$par.set$pars$cutoff$len + expect_equal(x2, 2L) + + ## (3) expressions that go deeper into the task + x1 = lrn1$par.set$pars$importance$default + x2 = lrn2$par.set$pars$importance$default expect_true(is.expression(x1)) expect_true(!is.expression(x2)) - expect_equal(x2, 2) + expect_equal(x2, binaryclass.task$task.desc$has.blocking) - ## expressions within hyperparameters + ## (4) expressions within hyperparameters + x1 = lrn1$par.vals$minsplit + x2 = lrn2$par.vals$minsplit + expect_true(is.expression(x1)) + expect_true(!is.expression(x2)) + expect_equal(x2, ceiling(0.1 * sum(dim(binaryclass.df)))) + + ## manually constructed expressions within hyperparams lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) lrn2 = evaluateLearner(learner = lrn1, task = binaryclass.task) x1 = lrn1$par.vals$minsplit From 00d92f8023f6470ff5455439c6990870479340ba Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Sat, 13 Aug 2016 03:10:36 +0200 Subject: [PATCH 10/17] removed ParamHelpers:: as it is not necessary --- R/evaluateLearner.R | 6 +++--- R/train.R | 2 +- R/tuneParams.R | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R index a17f0143ee..a7a637b5f8 100644 --- a/R/evaluateLearner.R +++ b/R/evaluateLearner.R @@ -61,9 +61,9 @@ evaluateLearner = function(learner, task) { #' @export evaluateParset = function(par.set, task) { dict = makeTaskDictionary(task = task) - if (ParamHelpers::hasExpression(par = par.set)) { - ParamHelpers::checkParamSet(par.set = par.set, dict = dict) - par.set = ParamHelpers::evaluateParamSet(par.set = par.set, dict = dict) + if (hasExpression(par = par.set)) { + checkParamSet(par.set = par.set, dict = dict) + par.set = evaluateParamSet(par.set = par.set, dict = dict) ## assure that the value names are also shown if the values list was unnamed par.set$pars = lapply(par.set$pars, function(x) { if (is.null(x$values) || !is.null(names(x$values))) diff --git a/R/train.R b/R/train.R index a42981370a..bd7f0624e0 100644 --- a/R/train.R +++ b/R/train.R @@ -30,7 +30,7 @@ #' print(mod) train = function(learner, task, subset, weights = NULL) { learner = checkLearner(learner) - if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) + if (hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) learner = evaluateLearner(learner = learner, task = task) assertClass(task, classes = "Task") if (missing(subset)) { diff --git a/R/tuneParams.R b/R/tuneParams.R index ac54878268..aca12ee348 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -79,11 +79,11 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) { learner = checkLearner(learner) assertClass(task, classes = "Task") - if (ParamHelpers::hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) + if (hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) learner = evaluateLearner(learner = learner, task = task) measures = checkMeasures(measures, learner) assertClass(par.set, classes = "ParamSet") - if (ParamHelpers::hasExpression(par.set)) + if (hasExpression(par.set)) par.set = evaluateParset(par.set = par.set, task = task) assertClass(control, classes = "TuneControl") if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) From e0b4728150d49291d9383c3fa103264ce6944e80 Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Mon, 15 Aug 2016 15:23:53 +0200 Subject: [PATCH 11/17] cleanup; added test --- DESCRIPTION | 3 +- NAMESPACE | 1 + R/Learner_properties.R | 4 ++ R/Task_operators.R | 16 +++--- R/evaluateLearner.R | 61 ++++++++++++---------- R/makeLearner.R | 12 +---- R/setHyperPars.R | 13 +---- R/train.R | 2 +- R/tuneParams.R | 2 +- man/evaluateLearner.Rd | 30 +++++------ man/makeLearner.Rd | 14 +---- man/setHyperPars.Rd | 15 +----- tests/testthat/test_base_evaluateLearner.R | 17 +++++- 13 files changed, 85 insertions(+), 105 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 91a0a91d68..ca4c361a6c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,10 +33,11 @@ Encoding: UTF-8 Roxygen: list(wrap = FALSE) Depends: R (>= 3.0.2), - ParamHelpers (>= 1.8), + ParamHelpers (>= 1.9) Imports: BBmisc (>= 1.10), ggplot2, + ParamHelpers (>= 1.9), stats, stringi, checkmate (>= 1.8.1), diff --git a/NAMESPACE b/NAMESPACE index 790375e1e9..444a4de9ab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,6 +86,7 @@ S3method(getTaskTargetNames,TaskDescUnsupervised) S3method(getTaskTargets,CostSensTask) S3method(getTaskTargets,SupervisedTask) S3method(getTaskTargets,UnsupervisedTask) +S3method(hasExpression,Learner) S3method(impute,Task) S3method(impute,data.frame) S3method(isFailureModel,BaseWrapperModel) diff --git a/R/Learner_properties.R b/R/Learner_properties.R index 29cb7ad3dc..9a81a53041 100644 --- a/R/Learner_properties.R +++ b/R/Learner_properties.R @@ -76,3 +76,7 @@ getSupportedLearnerProperties = function(type = NA_character_) { p[[type]] } +#' @export +hasExpression.Learner = function(par) { + any(hasExpression(par$par.set)) || any(vlapply(par$par.vals, is.expression)) +} diff --git a/R/Task_operators.R b/R/Task_operators.R index 7c2e135450..1dd8da4f44 100644 --- a/R/Task_operators.R +++ b/R/Task_operators.R @@ -254,7 +254,7 @@ getTaskData = function(task, subset, features, target.extra = FALSE, recode.targ checkTask(task, "Task") if (missing(subset)) { - subset = NULL + subset = NULL } else { assert(checkIntegerish(subset), checkLogical(subset)) if (is.logical(subset)) @@ -266,7 +266,7 @@ getTaskData = function(task, subset, features, target.extra = FALSE, recode.targ assertLogical(target.extra) task.features = getTaskFeatureNames(task) - + # if supplied check if the input is right and always convert 'features' # to character vec if (!missing(features)) { @@ -459,13 +459,13 @@ getTaskWeights = function(task) { # features (p), the number of observations (n), the task type (type) and in # case of classification tasks the number of class levels (k) makeTaskDictionary = function(task) { - dict = list() - dict$task = task - dict$p = getTaskNFeats(task) - dict$n = getTaskSize(task) - dict$type = getTaskType(task) + dict = list( + task = task, + p = getTaskNFeats(task), + n = getTaskSize(task), + type = getTaskType(task) + ) if (dict$type == "classif") dict$k = length(getTaskClassLevels(task)) - # dict$keys = setdiff(c(names(task$task.desc), names(task), "data", names(task$env$data)), names(dict)) return(dict) } diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R index a7a637b5f8..dffecb3547 100644 --- a/R/evaluateLearner.R +++ b/R/evaluateLearner.R @@ -1,7 +1,22 @@ #' @title Evaluates expressions within a learner or parameter set according to the task. #' -#' @description Updates learners and/or parameter sets by evaluating their expressions -#' based on a specific task. An overview of the possible expressions can be found in the details. +#' @description +#' A \code{\link{Learner}} or \code{\link[ParamHelpers]{ParamSet}} can contain an unevaluated \code{\link[base]{expression}} +#' as value for a hyperparameter. +#' E.g., these expressions are used if the default value dependents on the task size or an upper limit for a parameter +#' is given by the number of features in a task. +#' The provided functions evaluate such expressions in an environment (dictionary) which holds the following information: +#' \itemize{ +#' \item{\code{task}:} the task itself, allowing to access any of its elements. +#' \item{\code{p}:} the number of features in the task +#' \item{\code{n}:} the number of observations in the task +#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" +#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +#' } +#' Usually the evaluation of the expression is performed automatically, e.g. in \code{\link{train}} or +#' \code{\link{tuneParams}}. +#' Therefore calling \code{evaluateParamSet} or \code{evaluateLearner} manually should not be necessary. +#' #' @template arg_learner #' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr #' Parameter set of (hyper)parameters and their constraints. @@ -11,48 +26,37 @@ #' @return [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. #' @name evaluateLearner #' @rdname evaluateLearner -#' @details The expressions can be based on any information provided by the task. For convenience, -#' the most often used keys are available directly -#' \itemize{ -#' \item{\code{task}:} the task itself, allowing to access any of its elements -#' \item{\code{p}:} the number of features in the task -#' \item{\code{n}:} the number of observations in the task -#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" -#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) -#' } -#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could -#' access the "blocking" via \code{task$task.desc$has.blocking}. +#' @export #' @examples #' ## (1) evaluation of a learner's hyperparameters #' task = makeClassifTask(data = iris, target = "Species") #' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), #' minbucket = expression(3L + 4L * task$task.desc$has.blocking)) #' lrn2 = evaluateLearner(learner = lrn1, task = task) -#' -#' lrn1$par.vals -#' lrn2$par.vals -#' +#' +#' getHyperPars(lrn1) +#' getHyperPars(lrn2) +#' #' ## (2) evaluation of a learner's entire parameter set #' task = makeClassifTask(data = iris, target = "Species") #' lrn1 = makeLearner("classif.randomForest") #' lrn2 = evaluateLearner(learner = lrn1, task = task) -#' -#' ## focus on the parameters 'mtry', 'classwt' and 'cutoff' -#' lrn1$par.set -#' lrn2$par.set -#' +#' +#' ## Note the values for parameters 'mtry', 'classwt' and 'cutoff' +#' getParamSet(lrn1) +#' getParamSet(lrn2) +#' #' ## (3) evaluation of a parameter set #' task = makeClassifTask(data = iris, target = "Species") #' ps1 = makeParamSet( #' makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), #' makeDiscreteParam("sigma", values = expression(list(k, p))) #' ) -#' ps2 = evaluateParset(par.set = ps1, task = task) -#' @export +#' evaluateParset(par.set = ps1, task = task) evaluateLearner = function(learner, task) { dict = makeTaskDictionary(task = task) learner$par.set = evaluateParset(learner$par.set, task = task) - if (length(learner$par.vals) > 0 && any(vlapply(learner$par.vals, is.expression))) + if (any(vlapply(learner$par.vals, is.expression))) learner$par.vals = lapply(learner$par.vals, function(expr) eval(expr, envir = dict)) return(learner) } @@ -60,15 +64,14 @@ evaluateLearner = function(learner, task) { #' @rdname evaluateLearner #' @export evaluateParset = function(par.set, task) { - dict = makeTaskDictionary(task = task) if (hasExpression(par = par.set)) { + dict = makeTaskDictionary(task = task) checkParamSet(par.set = par.set, dict = dict) par.set = evaluateParamSet(par.set = par.set, dict = dict) ## assure that the value names are also shown if the values list was unnamed par.set$pars = lapply(par.set$pars, function(x) { - if (is.null(x$values) || !is.null(names(x$values))) - return(x) - names(x$values) = unlist(lapply(x$values, function(vals) vals)) + if (!is.null(x$values) && is.null(names(x$values))) + names(x$values) = unlist(x$values) return(x) }) } diff --git a/R/makeLearner.R b/R/makeLearner.R index 64ef3645fb..51212a1d4d 100644 --- a/R/makeLearner.R +++ b/R/makeLearner.R @@ -43,17 +43,7 @@ #' @return [\code{\link{Learner}}]. #' @family learner #' @export -#' @details Note that learners can also contain task dependent expressions, which can be based on any -#' information provided by the task. For convenience, the most often used keys are available directly -#' \itemize{ -#' \item{\code{task}:} the task itself, allowing to access any of its elements -#' \item{\code{p}:} the number of features in the task -#' \item{\code{n}:} the number of observations in the task -#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" -#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) -#' } -#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could -#' access the "blocking" via \code{task$task.desc$has.blocking}. +#' @note Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information. #' @aliases Learner #' @seealso [\code{\link{resample}}], [\code{\link{predict.WrappedModel}}] #' @examples diff --git a/R/setHyperPars.R b/R/setHyperPars.R index 4fd73416dd..2476960dde 100644 --- a/R/setHyperPars.R +++ b/R/setHyperPars.R @@ -11,17 +11,7 @@ #' @note If a named (hyper)parameter can't be found for the given learner, the 3 #' closest (hyper)parameter names will be output in case the user mistyped. #' @export -#' @details Note that learners can also contain task dependent expressions, which can be based on any -#' information provided by the task. For convenience, the most often used keys are available directly -#' \itemize{ -#' \item{\code{task}:} the task itself, allowing to access any of its elements -#' \item{\code{p}:} the number of features in the task -#' \item{\code{n}:} the number of observations in the task -#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" -#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) -#' } -#' However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could -#' access the "blocking" via \code{task$task.desc$has.blocking}. +#' @note Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information. #' @family learner #' @importFrom utils adist #' @examples @@ -29,7 +19,6 @@ #' cl2 = setHyperPars(cl1, sigma = 10, par.vals = list(C = 2)) #' cl3 = setHyperPars(cl2, C = expression(round(n / p))) #' print(cl1) -#' # note the now set and altered hyperparameters: #' print(cl2) #' print(cl3) setHyperPars = function(learner, ..., par.vals = list()) { diff --git a/R/train.R b/R/train.R index bd7f0624e0..125300e6bd 100644 --- a/R/train.R +++ b/R/train.R @@ -30,7 +30,7 @@ #' print(mod) train = function(learner, task, subset, weights = NULL) { learner = checkLearner(learner) - if (hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) + if (hasExpression(learner)) learner = evaluateLearner(learner = learner, task = task) assertClass(task, classes = "Task") if (missing(subset)) { diff --git a/R/tuneParams.R b/R/tuneParams.R index aca12ee348..54821665ef 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -79,7 +79,7 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) { learner = checkLearner(learner) assertClass(task, classes = "Task") - if (hasExpression(learner$par.set) || any(vlapply(learner$par.vals, is.expression))) + if (hasExpression(learner)) learner = evaluateLearner(learner = learner, task = task) measures = checkMeasures(measures, learner) assertClass(par.set, classes = "ParamSet") diff --git a/man/evaluateLearner.Rd b/man/evaluateLearner.Rd index e883d77841..a6593f7c7c 100644 --- a/man/evaluateLearner.Rd +++ b/man/evaluateLearner.Rd @@ -26,21 +26,21 @@ Dependent parameters with a \code{requires} field must use \code{quote} and not [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. } \description{ -Updates learners and/or parameter sets by evaluating their expressions -based on a specific task. An overview of the possible expressions can be found in the details. -} -\details{ -The expressions can be based on any information provided by the task. For convenience, -the most often used keys are available directly +A \code{\link{Learner}} or \code{\link[ParamHelpers]{ParamSet}} can contain an unevaluated \code{\link[base]{expression}} +as value for a hyperparameter. +E.g., these expressions are used if the default value dependents on the task size or an upper limit for a parameter +is given by the number of features in a task. +The provided functions evaluate such expressions in an environment (dictionary) which holds the following information: \itemize{ - \item{\code{task}:} the task itself, allowing to access any of its elements + \item{\code{task}:} the task itself, allowing to access any of its elements. \item{\code{p}:} the number of features in the task \item{\code{n}:} the number of observations in the task \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) } -However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could -access the "blocking" via \code{task$task.desc$has.blocking}. +Usually the evaluation of the expression is performed automatically, e.g. in \code{\link{train}} or +\code{\link{tuneParams}}. +Therefore calling \code{evaluateParamSet} or \code{evaluateLearner} manually should not be necessary. } \examples{ ## (1) evaluation of a learner's hyperparameters @@ -49,17 +49,17 @@ lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), minbucket = expression(3L + 4L * task$task.desc$has.blocking)) lrn2 = evaluateLearner(learner = lrn1, task = task) -lrn1$par.vals -lrn2$par.vals +getHyperPars(lrn1) +getHyperPars(lrn2) ## (2) evaluation of a learner's entire parameter set task = makeClassifTask(data = iris, target = "Species") lrn1 = makeLearner("classif.randomForest") lrn2 = evaluateLearner(learner = lrn1, task = task) -## focus on the parameters 'mtry', 'classwt' and 'cutoff' -lrn1$par.set -lrn2$par.set +## Note the values for parameters 'mtry', 'classwt' and 'cutoff' +getParamSet(lrn1) +getParamSet(lrn2) ## (3) evaluation of a parameter set task = makeClassifTask(data = iris, target = "Species") @@ -67,6 +67,6 @@ ps1 = makeParamSet( makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), makeDiscreteParam("sigma", values = expression(list(k, p))) ) -ps2 = evaluateParset(par.set = ps1, task = task) +evaluateParset(par.set = ps1, task = task) } diff --git a/man/makeLearner.Rd b/man/makeLearner.Rd index de87c79e74..3b8af3aa51 100644 --- a/man/makeLearner.Rd +++ b/man/makeLearner.Rd @@ -72,18 +72,8 @@ value selects the label. The threshold used to assign the label can later be cha To see all possible properties of a learner, go to: \code{\link{LearnerProperties}}. } -\details{ -Note that learners can also contain task dependent expressions, which can be based on any -information provided by the task. For convenience, the most often used keys are available directly -\itemize{ - \item{\code{task}:} the task itself, allowing to access any of its elements - \item{\code{p}:} the number of features in the task - \item{\code{n}:} the number of observations in the task - \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" - \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) -} -However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could -access the "blocking" via \code{task$task.desc$has.blocking}. +\note{ +Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information. } \examples{ makeLearner("classif.rpart") diff --git a/man/setHyperPars.Rd b/man/setHyperPars.Rd index ba49d48215..ea709c96eb 100644 --- a/man/setHyperPars.Rd +++ b/man/setHyperPars.Rd @@ -25,22 +25,11 @@ Optional list of named (hyper)parameter settings. The arguments in \description{ Set the hyperparameters of a learner object. } -\details{ -Note that learners can also contain task dependent expressions, which can be based on any -information provided by the task. For convenience, the most often used keys are available directly -\itemize{ - \item{\code{task}:} the task itself, allowing to access any of its elements - \item{\code{p}:} the number of features in the task - \item{\code{n}:} the number of observations in the task - \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" - \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) -} -However, if one wants to access any other parts of the \code{task}, one can do so. For instance, one could -access the "blocking" via \code{task$task.desc$has.blocking}. -} \note{ If a named (hyper)parameter can't be found for the given learner, the 3 closest (hyper)parameter names will be output in case the user mistyped. + +Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information. } \examples{ cl1 = makeLearner("classif.ksvm", sigma = 1) diff --git a/tests/testthat/test_base_evaluateLearner.R b/tests/testthat/test_base_evaluateLearner.R index 45d6fb1e2c..7058ca660d 100644 --- a/tests/testthat/test_base_evaluateLearner.R +++ b/tests/testthat/test_base_evaluateLearner.R @@ -24,7 +24,7 @@ test_that("expressions in learners", { expect_true(is.expression(x1)) expect_true(!is.expression(x2)) expect_equal(x2, binaryclass.task$task.desc$has.blocking) - + ## (4) expressions within hyperparameters x1 = lrn1$par.vals$minsplit x2 = lrn2$par.vals$minsplit @@ -48,9 +48,22 @@ test_that("expressions in parameter sets", { makeDiscreteParam("sigma", values = expression(list(p, k))) ) ps2 = evaluateParset(par.set = ps1, task = binaryclass.task) - + ## expressions within parameter sets expect_equal(ps2$pars$C$lower, 2L) expect_equal(ps2$pars$C$upper, 208L) expect_equal(ps2$pars$sigma$values, list("60" = 60, "2" = 2)) }) + +test_that("tuning works with expressions", { + task = multiclass.small.task + lrn = makeLearner("classif.rpart") + lrn = makeFilterWrapper(lrn, fw.method = "kruskal.test") + ps = makeParamSet( + makeIntegerParam("fw.abs", lower = 1, upper = expression(ceiling(n/2))) + ) + ctrl = makeTuneControlRandom(maxit = 5) + res = tuneParams(lrn, task = task, resampling = hout, par.set = ps, control = ctrl) + res = as.data.frame(res$opt.path) + expect_integer(res$fw.abs, lower = 1, upper = ceiling(getTaskSize(task)/2), any.missing = FALSE) +}) From 43e2f4f30d7ec7fb9af56f79c9ef4abab22c86eb Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Mon, 15 Aug 2016 15:56:16 +0200 Subject: [PATCH 12/17] removed duplicated PH dep --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ca4c361a6c..57368d2093 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,6 @@ Depends: Imports: BBmisc (>= 1.10), ggplot2, - ParamHelpers (>= 1.9), stats, stringi, checkmate (>= 1.8.1), From 8450107aa2e964cb749cd3a2c5a34db3657f7cd7 Mon Sep 17 00:00:00 2001 From: Pascal Kerschke Date: Fri, 19 Aug 2016 15:20:15 +0200 Subject: [PATCH 13/17] fixes as requested in PR#1126 --- NAMESPACE | 4 +- R/Learner_properties.R | 7 +- R/Task_operators.R | 20 ++++- R/evaluateLearner.R | 79 ------------------ R/evaluateParamExpressions.R | 77 ++++++++++++++++++ R/makeLearner.R | 2 +- R/setHyperPars.R | 2 +- R/train.R | 6 +- R/tuneParams.R | 25 ++++-- man/LearnerProperties.Rd | 6 ++ man/evaluateLearner.Rd | 72 ----------------- man/evaluateParamExpressions.Learner.Rd | 80 +++++++++++++++++++ man/getTaskClassLevels.Rd | 1 + man/getTaskCosts.Rd | 1 + man/getTaskData.Rd | 1 + man/getTaskDescription.Rd | 1 + man/getTaskDictionary.Rd | 38 +++++++++ man/getTaskFeatureNames.Rd | 1 + man/getTaskFormula.Rd | 1 + man/getTaskId.Rd | 1 + man/getTaskNFeats.Rd | 1 + man/getTaskSize.Rd | 1 + man/getTaskTargetNames.Rd | 1 + man/getTaskTargets.Rd | 1 + man/getTaskType.Rd | 1 + man/makeLearner.Rd | 2 +- man/makeTuneWrapper.Rd | 3 +- man/setHyperPars.Rd | 3 +- man/subsetTask.Rd | 1 + man/tuneParams.Rd | 17 +++- ...R => test_base_evaluateParamExpressions.R} | 13 +-- 31 files changed, 290 insertions(+), 179 deletions(-) delete mode 100644 R/evaluateLearner.R create mode 100644 R/evaluateParamExpressions.R delete mode 100644 man/evaluateLearner.Rd create mode 100644 man/evaluateParamExpressions.Learner.Rd create mode 100644 man/getTaskDictionary.Rd rename tests/testthat/{test_base_evaluateLearner.R => test_base_evaluateParamExpressions.R} (84%) diff --git a/NAMESPACE b/NAMESPACE index 444a4de9ab..ab4d17ab1d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ S3method(downsample,Task) S3method(estimateRelativeOverfitting,ResampleDesc) S3method(estimateResidualVariance,Learner) S3method(estimateResidualVariance,WrappedModel) +S3method(evaluateParamExpressions,Learner) S3method(generateCalibrationData,BenchmarkResult) S3method(generateCalibrationData,Prediction) S3method(generateCalibrationData,ResampleResult) @@ -758,8 +759,6 @@ export(dropFeatures) export(dunn) export(estimateRelativeOverfitting) export(estimateResidualVariance) -export(evaluateLearner) -export(evaluateParset) export(expvar) export(f1) export(fdr) @@ -822,6 +821,7 @@ export(getTaskClassLevels) export(getTaskCosts) export(getTaskData) export(getTaskDescription) +export(getTaskDictionary) export(getTaskFeatureNames) export(getTaskFormula) export(getTaskId) diff --git a/R/Learner_properties.R b/R/Learner_properties.R index 9a81a53041..1ae7bff7c5 100644 --- a/R/Learner_properties.R +++ b/R/Learner_properties.R @@ -76,7 +76,10 @@ getSupportedLearnerProperties = function(type = NA_character_) { p[[type]] } +#' @param obj [\code{\link{Learner}} | \code{character(1)}]\cr +#' Same as \code{learner} above. +#' @rdname LearnerProperties #' @export -hasExpression.Learner = function(par) { - any(hasExpression(par$par.set)) || any(vlapply(par$par.vals, is.expression)) +hasExpression.Learner = function(obj) { + any(hasExpression(obj$par.set)) || any(vlapply(obj$par.vals, is.expression)) } diff --git a/R/Task_operators.R b/R/Task_operators.R index 1dd8da4f44..1ea33a94b7 100644 --- a/R/Task_operators.R +++ b/R/Task_operators.R @@ -455,10 +455,22 @@ getTaskWeights = function(task) { } -# returns a dictionary, which contains the task itself (task), the number of -# features (p), the number of observations (n), the task type (type) and in -# case of classification tasks the number of class levels (k) -makeTaskDictionary = function(task) { +#' @title Create a dictionary based on the task. +#' +#' @description Returns a dictionary, which contains the \link{Task} itself +#' (\code{task}), the number of features (\code{p}), the number of +#' observations (\code{n}), the task type (\code{type}) and in case of +#' classification tasks, the number of class levels (\code{k}). +#' +#' @template arg_task +#' @return [\code{\link[base]{list}}]. Used for evaluating the expressions +#' within a parameter, parameter set or list of parameters. +#' @family task +#' @export +#' @examples +#' task = makeClassifTask(data = iris, target = "Species") +#' getTaskDictionary(task) +getTaskDictionary = function(task) { dict = list( task = task, p = getTaskNFeats(task), diff --git a/R/evaluateLearner.R b/R/evaluateLearner.R deleted file mode 100644 index dffecb3547..0000000000 --- a/R/evaluateLearner.R +++ /dev/null @@ -1,79 +0,0 @@ -#' @title Evaluates expressions within a learner or parameter set according to the task. -#' -#' @description -#' A \code{\link{Learner}} or \code{\link[ParamHelpers]{ParamSet}} can contain an unevaluated \code{\link[base]{expression}} -#' as value for a hyperparameter. -#' E.g., these expressions are used if the default value dependents on the task size or an upper limit for a parameter -#' is given by the number of features in a task. -#' The provided functions evaluate such expressions in an environment (dictionary) which holds the following information: -#' \itemize{ -#' \item{\code{task}:} the task itself, allowing to access any of its elements. -#' \item{\code{p}:} the number of features in the task -#' \item{\code{n}:} the number of observations in the task -#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" -#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) -#' } -#' Usually the evaluation of the expression is performed automatically, e.g. in \code{\link{train}} or -#' \code{\link{tuneParams}}. -#' Therefore calling \code{evaluateParamSet} or \code{evaluateLearner} manually should not be necessary. -#' -#' @template arg_learner -#' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr -#' Parameter set of (hyper)parameters and their constraints. -#' Dependent parameters with a \code{requires} field must use \code{quote} and not -#' \code{expression} to define it. -#' @template arg_task -#' @return [\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. -#' @name evaluateLearner -#' @rdname evaluateLearner -#' @export -#' @examples -#' ## (1) evaluation of a learner's hyperparameters -#' task = makeClassifTask(data = iris, target = "Species") -#' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), -#' minbucket = expression(3L + 4L * task$task.desc$has.blocking)) -#' lrn2 = evaluateLearner(learner = lrn1, task = task) -#' -#' getHyperPars(lrn1) -#' getHyperPars(lrn2) -#' -#' ## (2) evaluation of a learner's entire parameter set -#' task = makeClassifTask(data = iris, target = "Species") -#' lrn1 = makeLearner("classif.randomForest") -#' lrn2 = evaluateLearner(learner = lrn1, task = task) -#' -#' ## Note the values for parameters 'mtry', 'classwt' and 'cutoff' -#' getParamSet(lrn1) -#' getParamSet(lrn2) -#' -#' ## (3) evaluation of a parameter set -#' task = makeClassifTask(data = iris, target = "Species") -#' ps1 = makeParamSet( -#' makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), -#' makeDiscreteParam("sigma", values = expression(list(k, p))) -#' ) -#' evaluateParset(par.set = ps1, task = task) -evaluateLearner = function(learner, task) { - dict = makeTaskDictionary(task = task) - learner$par.set = evaluateParset(learner$par.set, task = task) - if (any(vlapply(learner$par.vals, is.expression))) - learner$par.vals = lapply(learner$par.vals, function(expr) eval(expr, envir = dict)) - return(learner) -} - -#' @rdname evaluateLearner -#' @export -evaluateParset = function(par.set, task) { - if (hasExpression(par = par.set)) { - dict = makeTaskDictionary(task = task) - checkParamSet(par.set = par.set, dict = dict) - par.set = evaluateParamSet(par.set = par.set, dict = dict) - ## assure that the value names are also shown if the values list was unnamed - par.set$pars = lapply(par.set$pars, function(x) { - if (!is.null(x$values) && is.null(names(x$values))) - names(x$values) = unlist(x$values) - return(x) - }) - } - return(par.set) -} diff --git a/R/evaluateParamExpressions.R b/R/evaluateParamExpressions.R new file mode 100644 index 0000000000..d02ffcdeed --- /dev/null +++ b/R/evaluateParamExpressions.R @@ -0,0 +1,77 @@ +#' @title Evaluates expressions within a learner or parameter set. +#' +#' @description +#' A \code{\link{Learner}} can contain unevaluated \code{\link[base]{expression}s} +#' as value for a hyperparameter. E.g., these expressions are used if the default +#' value depends on the task size or an upper limit for a parameter is given by +#' the number of features in a task. \code{evaluateParamExpressions} allows to +#' evaluate these expressions using a given dictionary, which holds the following +#' information: +#' \itemize{ +#' \item{\code{task}:} the task itself, allowing to access any of its elements. +#' \item{\code{p}:} the number of features in the task +#' \item{\code{n}:} the number of observations in the task +#' \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" +#' \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +#' } +#' Usually the evaluation of the expression is performed automatically, e.g. in +#' \code{\link{train}} or \code{\link{tuneParams}}. Therefore calling +#' \code{evaluateParamExpressions} manually should not be necessary. +#' It is also possible to directly evaluate the expressions of a +#' \code{\link[ParamHelpers]{ParamSet}}, \code{\link[base]{list}} of +#' \code{\link[ParamHelpers]{Param}s} or single \code{\link[ParamHelpers]{Param}s}. +#' For further information on these, please refer to the documentation of the +#' \code{ParamHelpers} package. +#' +#' @param obj [\code{\link{Learner}}]\cr +#' The learner. If you pass a string the learner will be created via +#' \code{\link{makeLearner}}. Expressions within \code{length}, \code{lower} +#' or \code{upper} boundaries, \code{default} or \code{value} will be +#' evaluated using the provided dictionary (\code{dict}). +#' @param dict [\code{environment} | \code{list} | \code{NULL}]\cr +#' Environment or list which will be used for evaluating the variables +#' of expressions within a parameter, parameter set or list of parameters. +#' The default is \code{NULL}. +#' @return [\code{\link{Learner}}]. +#' @export +#' @examples +#' ## (1) evaluation of a learner's hyperparameters +#' task = makeClassifTask(data = iris, target = "Species") +#' dict = getTaskDictionary(task = task) +#' lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), +#' minbucket = expression(3L + 4L * task$task.desc$has.blocking)) +#' lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) +#' +#' getHyperPars(lrn1) +#' getHyperPars(lrn2) +#' +#' ## (2) evaluation of a learner's entire parameter set +#' task = makeClassifTask(data = iris, target = "Species") +#' dict = getTaskDictionary(task = task) +#' lrn1 = makeLearner("classif.randomForest") +#' lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) +#' +#' ## Note the values for parameters 'mtry', 'classwt' and 'cutoff' +#' lrn1$par.set +#' lrn2$par.set +#' +#' ## (3) evaluation of a parameter set +#' task = makeClassifTask(data = iris, target = "Species") +#' dict = getTaskDictionary(task = task) +#' ps1 = makeParamSet( +#' makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), +#' makeDiscreteParam("sigma", values = expression(list(k, p))) +#' ) +#' ps2 = evaluateParamExpressions(obj = ps1, dict = dict) +#' +#' ps1 +#' ps2 +evaluateParamExpressions.Learner = function(obj, dict = NULL) { + obj = checkLearner(obj) + if (hasExpression(obj)) { + assertList(dict, null.ok = TRUE) + obj$par.set = evaluateParamExpressions(obj = obj$par.set, dict = dict) + obj$par.vals = evaluateParamExpressions(obj = obj$par.vals, dict = dict) + } + return(obj) +} diff --git a/R/makeLearner.R b/R/makeLearner.R index 51212a1d4d..15a8ca801e 100644 --- a/R/makeLearner.R +++ b/R/makeLearner.R @@ -43,7 +43,7 @@ #' @return [\code{\link{Learner}}]. #' @family learner #' @export -#' @note Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information. +#' @note Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information. #' @aliases Learner #' @seealso [\code{\link{resample}}], [\code{\link{predict.WrappedModel}}] #' @examples diff --git a/R/setHyperPars.R b/R/setHyperPars.R index 2476960dde..352967c150 100644 --- a/R/setHyperPars.R +++ b/R/setHyperPars.R @@ -11,7 +11,7 @@ #' @note If a named (hyper)parameter can't be found for the given learner, the 3 #' closest (hyper)parameter names will be output in case the user mistyped. #' @export -#' @note Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information. +#' @note Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information. #' @family learner #' @importFrom utils adist #' @examples diff --git a/R/train.R b/R/train.R index 125300e6bd..e8a9894a91 100644 --- a/R/train.R +++ b/R/train.R @@ -30,9 +30,11 @@ #' print(mod) train = function(learner, task, subset, weights = NULL) { learner = checkLearner(learner) - if (hasExpression(learner)) - learner = evaluateLearner(learner = learner, task = task) assertClass(task, classes = "Task") + if (hasExpression(learner)) { + dict = getTaskDictionary(task = task) + learner = evaluateParamExpressions(obj = learner, dict = dict) + } if (missing(subset)) { subset = seq_len(getTaskSize(task)) } else { diff --git a/R/tuneParams.R b/R/tuneParams.R index 54821665ef..426d035c5e 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -22,7 +22,8 @@ #' @param par.set [\code{\link[ParamHelpers]{ParamSet}}]\cr #' Collection of parameters and their constraints for optimization. #' Dependent parameters with a \code{requires} field must use \code{quote} and not -#' \code{expression} to define it. +#' \code{expression} to define it. On the other hand, task dependent parameters +#' need to be defined with expressions. #' @param control [\code{\link{TuneControl}}]\cr #' Control object for search method. Also selects the optimization algorithm for tuning. #' @template arg_showinfo @@ -30,7 +31,9 @@ #' @family tune #' @note If you would like to include results from the training data set, make #' sure to appropriately adjust the resampling strategy and the aggregation for -#' the measure. See example code below. +#' the measure. See example code below.\cr +#' Note that learners and parameter sets can contain task dependent +#' expressions, see \code{\link{evaluateParamExpressions}} for more information. #' @export #' @examples #' # a grid search for an SVM (with a tiny number of points...) @@ -50,6 +53,16 @@ #' print(head(generateHyperParsEffectData(res))) #' print(head(generateHyperParsEffectData(res, trafo = TRUE))) #' +#' # tuning the parameters 'C' and 'sigma' of an SVM, where the boundaries +#' # of 'sigma' depend on the number of features +#' ps = makeParamSet( +#' makeNumericLearnerParam("sigma", lower = expression(0.2 * p), upper = expression(2.5 * p)), +#' makeDiscreteLearnerParam("C", values = 2^c(-1, 1)) +#' ) +#' rdesc = makeResampleDesc("Subsample") +#' ctrl = makeTuneControlRandom(maxit = 2L) +#' res = tuneParams(task = iris.task, learner = "classif.ksvm", par.set = ps, control = ctrl, resampling = rdesc) +#' #' \dontrun{ #' # we optimize the SVM over 3 kernels simultanously #' # note how we use dependent params (requires = ...) and iterated F-racing here @@ -79,12 +92,14 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) { learner = checkLearner(learner) assertClass(task, classes = "Task") - if (hasExpression(learner)) - learner = evaluateLearner(learner = learner, task = task) + dict = getTaskDictionary(task = task) + if (hasExpression(learner)) { + learner = evaluateParamExpressions(obj = learner, dict = dict) + } measures = checkMeasures(measures, learner) assertClass(par.set, classes = "ParamSet") if (hasExpression(par.set)) - par.set = evaluateParset(par.set = par.set, task = task) + par.set = evaluateParamExpressions(obj = par.set, dict = dict) assertClass(control, classes = "TuneControl") if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) stop("Argument resampling must be of class ResampleDesc or ResampleInstance!") diff --git a/man/LearnerProperties.Rd b/man/LearnerProperties.Rd index 45206ab07b..573d2a634a 100644 --- a/man/LearnerProperties.Rd +++ b/man/LearnerProperties.Rd @@ -3,12 +3,15 @@ \name{LearnerProperties} \alias{LearnerProperties} \alias{getLearnerProperties} +\alias{hasExpression.Learner} \alias{hasLearnerProperties} \title{Query properties of learners.} \usage{ getLearnerProperties(learner) hasLearnerProperties(learner, props) + +\method{hasExpression}{Learner}(obj) } \arguments{ \item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr @@ -17,6 +20,9 @@ If you pass a string the learner will be created via \code{\link{makeLearner}}.} \item{props}{[\code{character}]\cr Vector of properties to query.} + +\item{obj}{[\code{\link{Learner}} | \code{character(1)}]\cr +Same as \code{learner} above.} } \value{ \code{getLearnerProperties} returns a character vector with learner properties. diff --git a/man/evaluateLearner.Rd b/man/evaluateLearner.Rd deleted file mode 100644 index a6593f7c7c..0000000000 --- a/man/evaluateLearner.Rd +++ /dev/null @@ -1,72 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/evaluateLearner.R -\name{evaluateLearner} -\alias{evaluateLearner} -\alias{evaluateParset} -\title{Evaluates expressions within a learner or parameter set according to the task.} -\usage{ -evaluateLearner(learner, task) - -evaluateParset(par.set, task) -} -\arguments{ -\item{learner}{[\code{\link{Learner}} | \code{character(1)}]\cr -The learner. -If you pass a string the learner will be created via \code{\link{makeLearner}}.} - -\item{task}{[\code{\link{Task}}]\cr -The task.} - -\item{par.set}{[\code{\link[ParamHelpers]{ParamSet}}]\cr -Parameter set of (hyper)parameters and their constraints. -Dependent parameters with a \code{requires} field must use \code{quote} and not -\code{expression} to define it.} -} -\value{ -[\code{\link{Learner}} | \code{\link[ParamHelpers]{ParamSet}}]. -} -\description{ -A \code{\link{Learner}} or \code{\link[ParamHelpers]{ParamSet}} can contain an unevaluated \code{\link[base]{expression}} -as value for a hyperparameter. -E.g., these expressions are used if the default value dependents on the task size or an upper limit for a parameter -is given by the number of features in a task. -The provided functions evaluate such expressions in an environment (dictionary) which holds the following information: -\itemize{ - \item{\code{task}:} the task itself, allowing to access any of its elements. - \item{\code{p}:} the number of features in the task - \item{\code{n}:} the number of observations in the task - \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" - \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) -} -Usually the evaluation of the expression is performed automatically, e.g. in \code{\link{train}} or -\code{\link{tuneParams}}. -Therefore calling \code{evaluateParamSet} or \code{evaluateLearner} manually should not be necessary. -} -\examples{ -## (1) evaluation of a learner's hyperparameters -task = makeClassifTask(data = iris, target = "Species") -lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), - minbucket = expression(3L + 4L * task$task.desc$has.blocking)) -lrn2 = evaluateLearner(learner = lrn1, task = task) - -getHyperPars(lrn1) -getHyperPars(lrn2) - -## (2) evaluation of a learner's entire parameter set -task = makeClassifTask(data = iris, target = "Species") -lrn1 = makeLearner("classif.randomForest") -lrn2 = evaluateLearner(learner = lrn1, task = task) - -## Note the values for parameters 'mtry', 'classwt' and 'cutoff' -getParamSet(lrn1) -getParamSet(lrn2) - -## (3) evaluation of a parameter set -task = makeClassifTask(data = iris, target = "Species") -ps1 = makeParamSet( - makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), - makeDiscreteParam("sigma", values = expression(list(k, p))) -) -evaluateParset(par.set = ps1, task = task) -} - diff --git a/man/evaluateParamExpressions.Learner.Rd b/man/evaluateParamExpressions.Learner.Rd new file mode 100644 index 0000000000..27810a0153 --- /dev/null +++ b/man/evaluateParamExpressions.Learner.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/evaluateParamExpressions.R +\name{evaluateParamExpressions.Learner} +\alias{evaluateParamExpressions.Learner} +\title{Evaluates expressions within a learner or parameter set.} +\usage{ +\method{evaluateParamExpressions}{Learner}(obj, dict = NULL) +} +\arguments{ +\item{obj}{[\code{\link{Learner}}]\cr +The learner. If you pass a string the learner will be created via +\code{\link{makeLearner}}. Expressions within \code{length}, \code{lower} +or \code{upper} boundaries, \code{default} or \code{value} will be +evaluated using the provided dictionary (\code{dict}).} + +\item{dict}{[\code{environment} | \code{list} | \code{NULL}]\cr +Environment or list which will be used for evaluating the variables +of expressions within a parameter, parameter set or list of parameters. +The default is \code{NULL}.} +} +\value{ +[\code{\link{Learner}}]. +} +\description{ +A \code{\link{Learner}} can contain unevaluated \code{\link[base]{expression}s} +as value for a hyperparameter. E.g., these expressions are used if the default +value depends on the task size or an upper limit for a parameter is given by +the number of features in a task. \code{evaluateParamExpressions} allows to +evaluate these expressions using a given dictionary, which holds the following +information: +\itemize{ + \item{\code{task}:} the task itself, allowing to access any of its elements. + \item{\code{p}:} the number of features in the task + \item{\code{n}:} the number of observations in the task + \item{\code{type}:} the task type, i.e. "classif", "regr", "surv", "cluster", "costcens" or "multilabel" + \item{\code{k}:} the number of classes of the target variable (only available for classification tasks) +} +Usually the evaluation of the expression is performed automatically, e.g. in +\code{\link{train}} or \code{\link{tuneParams}}. Therefore calling +\code{evaluateParamExpressions} manually should not be necessary. +It is also possible to directly evaluate the expressions of a +\code{\link[ParamHelpers]{ParamSet}}, \code{\link[base]{list}} of +\code{\link[ParamHelpers]{Param}s} or single \code{\link[ParamHelpers]{Param}s}. +For further information on these, please refer to the documentation of the +\code{ParamHelpers} package. +} +\examples{ +## (1) evaluation of a learner's hyperparameters +task = makeClassifTask(data = iris, target = "Species") +dict = getTaskDictionary(task = task) +lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p), + minbucket = expression(3L + 4L * task$task.desc$has.blocking)) +lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) + +getHyperPars(lrn1) +getHyperPars(lrn2) + +## (2) evaluation of a learner's entire parameter set +task = makeClassifTask(data = iris, target = "Species") +dict = getTaskDictionary(task = task) +lrn1 = makeLearner("classif.randomForest") +lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) + +## Note the values for parameters 'mtry', 'classwt' and 'cutoff' +lrn1$par.set +lrn2$par.set + +## (3) evaluation of a parameter set +task = makeClassifTask(data = iris, target = "Species") +dict = getTaskDictionary(task = task) +ps1 = makeParamSet( + makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), + makeDiscreteParam("sigma", values = expression(list(k, p))) +) +ps2 = evaluateParamExpressions(obj = ps1, dict = dict) + +ps1 +ps2 +} + diff --git a/man/getTaskClassLevels.Rd b/man/getTaskClassLevels.Rd index 634f4242c3..4bbda1c498 100644 --- a/man/getTaskClassLevels.Rd +++ b/man/getTaskClassLevels.Rd @@ -21,6 +21,7 @@ actually return the same thing. Other task: \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskCosts.Rd b/man/getTaskCosts.Rd index 75c171abd6..0f9f2c8af9 100644 --- a/man/getTaskCosts.Rd +++ b/man/getTaskCosts.Rd @@ -24,6 +24,7 @@ Retuns \dQuote{NULL} if the task is not of type \dQuote{costsens}. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskData.Rd b/man/getTaskData.Rd index 522b55f7e9..990f537aec 100644 --- a/man/getTaskData.Rd +++ b/man/getTaskData.Rd @@ -63,6 +63,7 @@ head(getTaskData(task, subset = 1:100, recode.target = "01")) Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskDescription.Rd b/man/getTaskDescription.Rd index e00acc56bc..4b09b07c75 100644 --- a/man/getTaskDescription.Rd +++ b/man/getTaskDescription.Rd @@ -19,6 +19,7 @@ Get a summarizing task description. \seealso{ Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskDictionary.Rd b/man/getTaskDictionary.Rd new file mode 100644 index 0000000000..92f20e5458 --- /dev/null +++ b/man/getTaskDictionary.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Task_operators.R +\name{getTaskDictionary} +\alias{getTaskDictionary} +\title{Create a dictionary based on the task.} +\usage{ +getTaskDictionary(task) +} +\arguments{ +\item{task}{[\code{\link{Task}}]\cr +The task.} +} +\value{ +[\code{\link[base]{list}}]. Used for evaluating the expressions +within a parameter, parameter set or list of parameters. +} +\description{ +Returns a dictionary, which contains the \link{Task} itself +(\code{task}), the number of features (\code{p}), the number of +observations (\code{n}), the task type (\code{type}) and in case of +classification tasks, the number of class levels (\code{k}). +} +\examples{ +task = makeClassifTask(data = iris, target = "Species") +getTaskDictionary(task) +} +\seealso{ +Other task: \code{\link{getTaskClassLevels}}, + \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, + \code{\link{getTaskDescription}}, + \code{\link{getTaskFeatureNames}}, + \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, + \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, + \code{\link{getTaskTargetNames}}, + \code{\link{getTaskTargets}}, \code{\link{getTaskType}}, + \code{\link{subsetTask}} +} + diff --git a/man/getTaskFeatureNames.Rd b/man/getTaskFeatureNames.Rd index 23b80a30a3..ce64e8a73e 100644 --- a/man/getTaskFeatureNames.Rd +++ b/man/getTaskFeatureNames.Rd @@ -20,6 +20,7 @@ Target column name is not included. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, \code{\link{getTaskTargetNames}}, diff --git a/man/getTaskFormula.Rd b/man/getTaskFormula.Rd index 189c3ac2ab..b352ab5c58 100644 --- a/man/getTaskFormula.Rd +++ b/man/getTaskFormula.Rd @@ -34,6 +34,7 @@ For multilabel it is \dQuote{ + ... + ~ .}. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskId.Rd b/man/getTaskId.Rd index e03153675f..2e7aa1e38e 100644 --- a/man/getTaskId.Rd +++ b/man/getTaskId.Rd @@ -20,6 +20,7 @@ Get the id of the task. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskNFeats.Rd b/man/getTaskNFeats.Rd index c7586a5231..0f3b6ee3db 100644 --- a/man/getTaskNFeats.Rd +++ b/man/getTaskNFeats.Rd @@ -20,6 +20,7 @@ Get number of features in task. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskSize.Rd b/man/getTaskSize.Rd index bc92e20bd7..2b9bed0e0c 100644 --- a/man/getTaskSize.Rd +++ b/man/getTaskSize.Rd @@ -20,6 +20,7 @@ Get number of observations in task. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, diff --git a/man/getTaskTargetNames.Rd b/man/getTaskTargetNames.Rd index 6977e6c4de..2f1114be07 100644 --- a/man/getTaskTargetNames.Rd +++ b/man/getTaskTargetNames.Rd @@ -21,6 +21,7 @@ actually return the same thing. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskTargets.Rd b/man/getTaskTargets.Rd index e59929b092..c2cdff5852 100644 --- a/man/getTaskTargets.Rd +++ b/man/getTaskTargets.Rd @@ -32,6 +32,7 @@ getTaskTargets(task) Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/getTaskType.Rd b/man/getTaskType.Rd index 3ce190afcd..e6e5193656 100644 --- a/man/getTaskType.Rd +++ b/man/getTaskType.Rd @@ -20,6 +20,7 @@ Get the type of the task. Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/makeLearner.Rd b/man/makeLearner.Rd index 3b8af3aa51..8bd0c8874a 100644 --- a/man/makeLearner.Rd +++ b/man/makeLearner.Rd @@ -73,7 +73,7 @@ value selects the label. The threshold used to assign the label can later be cha To see all possible properties of a learner, go to: \code{\link{LearnerProperties}}. } \note{ -Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information. +Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information. } \examples{ makeLearner("classif.rpart") diff --git a/man/makeTuneWrapper.Rd b/man/makeTuneWrapper.Rd index 11ef6aa9fa..9768634133 100644 --- a/man/makeTuneWrapper.Rd +++ b/man/makeTuneWrapper.Rd @@ -26,7 +26,8 @@ Default is the default measure for the task, see here \code{\link{getDefaultMeas \item{par.set}{[\code{\link[ParamHelpers]{ParamSet}}]\cr Collection of parameters and their constraints for optimization. Dependent parameters with a \code{requires} field must use \code{quote} and not -\code{expression} to define it.} +\code{expression} to define it. On the other hand, task dependent parameters +need to be defined with expressions.} \item{control}{[\code{\link{TuneControl}}]\cr Control object for search method. Also selects the optimization algorithm for tuning.} diff --git a/man/setHyperPars.Rd b/man/setHyperPars.Rd index ea709c96eb..f181926a97 100644 --- a/man/setHyperPars.Rd +++ b/man/setHyperPars.Rd @@ -29,14 +29,13 @@ Set the hyperparameters of a learner object. If a named (hyper)parameter can't be found for the given learner, the 3 closest (hyper)parameter names will be output in case the user mistyped. -Learners can contain task dependent expressions, see \code{\link{evaluateLearner}} for more information. +Learners can contain task dependent expressions, see \code{\link{evaluateParamExpressions}} for more information. } \examples{ cl1 = makeLearner("classif.ksvm", sigma = 1) cl2 = setHyperPars(cl1, sigma = 10, par.vals = list(C = 2)) cl3 = setHyperPars(cl2, C = expression(round(n / p))) print(cl1) -# note the now set and altered hyperparameters: print(cl2) print(cl3) } diff --git a/man/subsetTask.Rd b/man/subsetTask.Rd index a90689f72e..9ab8c7680f 100644 --- a/man/subsetTask.Rd +++ b/man/subsetTask.Rd @@ -37,6 +37,7 @@ subsetTask(task, subset = 1:100) Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskCosts}}, \code{\link{getTaskData}}, \code{\link{getTaskDescription}}, + \code{\link{getTaskDictionary}}, \code{\link{getTaskFeatureNames}}, \code{\link{getTaskFormula}}, \code{\link{getTaskId}}, \code{\link{getTaskNFeats}}, \code{\link{getTaskSize}}, diff --git a/man/tuneParams.Rd b/man/tuneParams.Rd index 009644958d..769e1942f0 100644 --- a/man/tuneParams.Rd +++ b/man/tuneParams.Rd @@ -29,7 +29,8 @@ Default is the default measure for the task, see here \code{\link{getDefaultMeas \item{par.set}{[\code{\link[ParamHelpers]{ParamSet}}]\cr Collection of parameters and their constraints for optimization. Dependent parameters with a \code{requires} field must use \code{quote} and not -\code{expression} to define it.} +\code{expression} to define it. On the other hand, task dependent parameters +need to be defined with expressions.} \item{control}{[\code{\link{TuneControl}}]\cr Control object for search method. Also selects the optimization algorithm for tuning.} @@ -53,7 +54,9 @@ Multi-criteria tuning can be done with \code{\link{tuneParamsMultiCrit}}. \note{ If you would like to include results from the training data set, make sure to appropriately adjust the resampling strategy and the aggregation for -the measure. See example code below. +the measure. See example code below.\cr +Note that learners and parameter sets can contain task dependent +expressions, see \code{\link{evaluateParamExpressions}} for more information. } \examples{ # a grid search for an SVM (with a tiny number of points...) @@ -73,6 +76,16 @@ print(head(as.data.frame(res$opt.path, trafo = TRUE))) print(head(generateHyperParsEffectData(res))) print(head(generateHyperParsEffectData(res, trafo = TRUE))) +# tuning the parameters 'C' and 'sigma' of an SVM, where the boundaries +# of 'sigma' depend on the number of features +ps = makeParamSet( + makeNumericLearnerParam("sigma", lower = expression(0.2 * p), upper = expression(2.5 * p)), + makeDiscreteLearnerParam("C", values = 2^c(-1, 1)) +) +rdesc = makeResampleDesc("Subsample") +ctrl = makeTuneControlRandom(maxit = 2L) +res = tuneParams(task = iris.task, learner = "classif.ksvm", par.set = ps, control = ctrl, resampling = rdesc) + \dontrun{ # we optimize the SVM over 3 kernels simultanously # note how we use dependent params (requires = ...) and iterated F-racing here diff --git a/tests/testthat/test_base_evaluateLearner.R b/tests/testthat/test_base_evaluateParamExpressions.R similarity index 84% rename from tests/testthat/test_base_evaluateLearner.R rename to tests/testthat/test_base_evaluateParamExpressions.R index 7058ca660d..665306e2c3 100644 --- a/tests/testthat/test_base_evaluateLearner.R +++ b/tests/testthat/test_base_evaluateParamExpressions.R @@ -1,10 +1,11 @@ -context("evaluate expressions") +context("evaluate param expressions") test_that("expressions in learners", { ## expressions within 'pre-defined' learners ## (1) expressions within default of parameter sets lrn1 = makeLearner("classif.__mlrmocklearners__7") - lrn2 = evaluateLearner(learner = lrn1, task = binaryclass.task) + dict = getTaskDictionary(task = binaryclass.task) + lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) x1 = lrn1$par.set$pars$mtry$default x2 = lrn2$par.set$pars$mtry$default expect_true(is.expression(x1)) @@ -34,7 +35,8 @@ test_that("expressions in learners", { ## manually constructed expressions within hyperparams lrn1 = makeLearner("classif.rpart", minsplit = expression(k * p)) - lrn2 = evaluateLearner(learner = lrn1, task = binaryclass.task) + dict = getTaskDictionary(task = binaryclass.task) + lrn2 = evaluateParamExpressions(obj = lrn1, dict = dict) x1 = lrn1$par.vals$minsplit x2 = lrn2$par.vals$minsplit expect_true(is.expression(x1)) @@ -47,12 +49,13 @@ test_that("expressions in parameter sets", { makeNumericParam("C", lower = expression(k), upper = expression(n), trafo = function(x) 2^x), makeDiscreteParam("sigma", values = expression(list(p, k))) ) - ps2 = evaluateParset(par.set = ps1, task = binaryclass.task) + dict = getTaskDictionary(task = binaryclass.task) + ps2 = evaluateParamExpressions(obj = ps1, dict = dict) ## expressions within parameter sets expect_equal(ps2$pars$C$lower, 2L) expect_equal(ps2$pars$C$upper, 208L) - expect_equal(ps2$pars$sigma$values, list("60" = 60, "2" = 2)) + expect_equal(ps2$pars$sigma$values, list(60, 2)) }) test_that("tuning works with expressions", { From 3cfe85dccb55427500b01dee64f7e6aaf9750b6e Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Thu, 25 Aug 2016 22:30:23 +0200 Subject: [PATCH 14/17] use setLearnerId in tests --- tests/testthat/test_base_getParamSet.R | 2 +- tests/testthat/test_base_makeLearners.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_base_getParamSet.R b/tests/testthat/test_base_getParamSet.R index 6d10abdcd9..7f3bd58809 100644 --- a/tests/testthat/test_base_getParamSet.R +++ b/tests/testthat/test_base_getParamSet.R @@ -9,7 +9,7 @@ test_that("getParamSet", { ps = getParamSet(lrn) expect_true(all(c("method", "fw.method") %in% names(ps$pars))) - lrn = makeModelMultiplexer(list(setId(lrn, "x"))) + lrn = makeModelMultiplexer(list(setLearnerId(lrn, "x"))) ps = getParamSet(lrn) expect_true(all(c("x.method", "x.fw.method", "selected.learner") %in% names(ps$pars))) diff --git a/tests/testthat/test_base_makeLearners.R b/tests/testthat/test_base_makeLearners.R index e59dc37083..b941ac60ed 100644 --- a/tests/testthat/test_base_makeLearners.R +++ b/tests/testthat/test_base_makeLearners.R @@ -6,7 +6,7 @@ test_that("makeLearners", { lrns1 = setNames(lapply(cls1, makeLearner), cls1) ids = c("a", "b") lrns1 = setNames(lapply(cls1, makeLearner), cls1) - lrns2 = setNames(mapply(setId, lrns1, ids, SIMPLIFY = FALSE), ids) + lrns2 = setNames(mapply(setLearnerId, lrns1, ids, SIMPLIFY = FALSE), ids) lrns3 = lapply(lrns1, setPredictType, predict.type = "prob") res = makeLearners(cls1) From bce6633fa3bd5df154d356d5a31413065dbc976e Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Thu, 25 Aug 2016 23:01:37 +0200 Subject: [PATCH 15/17] fix warnings in tests and r cmd check --- R/tuneParams.R | 2 +- tests/testthat/test_base_measures.R | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/R/tuneParams.R b/R/tuneParams.R index 426d035c5e..49e17f3fa6 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -61,7 +61,7 @@ #' ) #' rdesc = makeResampleDesc("Subsample") #' ctrl = makeTuneControlRandom(maxit = 2L) -#' res = tuneParams(task = iris.task, learner = "classif.ksvm", par.set = ps, control = ctrl, resampling = rdesc) +#' res = tuneParams("classif.ksvm", iris.task, par.set = ps, control = ctrl, resampling = rdesc) #' #' \dontrun{ #' # we optimize the SVM over 3 kernels simultanously diff --git a/tests/testthat/test_base_measures.R b/tests/testthat/test_base_measures.R index 96c836e384..ce3faadc05 100644 --- a/tests/testthat/test_base_measures.R +++ b/tests/testthat/test_base_measures.R @@ -255,8 +255,8 @@ test_that("check measure calculations", { expect_warning(measureRAE(c(1,1,1,1),c(1,2,3,4))) expect_silent(measureRAE(c(1,1,1,0),c(2,2,2,2))) # mape - expect_equal(NA, mape$fun(pred = pred.regr)) - expect_equal(NA, measureMAPE(c(5, 10, 0, 5),c(4, 11, 0, 4))) + expect_equal(NA, suppressWarnings(mape$fun(pred = pred.regr))) + expect_equal(NA, suppressWarnings(measureMAPE(c(5, 10, 0, 5),c(4, 11, 0, 4)))) pred.regr.mape = pred.regr pred.regr.mape$data$truth = c(5, 10, 1, 5) #we change the 0 target because mape is undefined mape.perf = performance(pred.regr.mape, measures = mape, model = mod.regr) @@ -267,7 +267,7 @@ test_that("check measure calculations", { expect_warning(measureMAPE(0,0)) expect_warning(measureMAPE(c(1,1,1,0),c(2,2,2,2))) expect_silent(measureMAPE(c(1,1,1,1),c(2,2,2,2))) - + #test multiclass measures #mmce @@ -385,7 +385,7 @@ test_that("check measure calculations", { expect_equal(measureLSR(p2[1,,drop=FALSE], y2[1]), log(0.1)) expect_equal(measureLSR(p2[1,,drop=FALSE], y1[1]), log(0.9)) - + #test binaryclass measures #brier @@ -629,18 +629,18 @@ test_that("check measure calculations", { expect_equal(object = silhouette.test, as.numeric(silhouette.perf)) #test that some measures are only transformations of each other - + #qsr is identical to the 1 - multiclass brier expect_equal(1 - measureMulticlassBrier(p1, y1), measureQSR(p1, y1), check.names = FALSE) qsr.bin.perf = performance(pred.bin, measures = qsr, model = mod.bin) expect_equal(1 - 2 * brier.perf, qsr.bin.perf, check.names = FALSE) - + expect_equal(lsr.perf, -1 * logloss.perf, check.names = FALSE) - + #multiclass brier for a two class problem should be two times the binary brier score. multiclass.brier.twoclass.perf = performance(pred.bin, measures = multiclass.brier, model = mod.bin) expect_equal(2 * brier.perf, multiclass.brier.twoclass.perf, check.names = FALSE) - + }) test_that("getDefaultMeasure", { @@ -655,21 +655,21 @@ test_that("measures quickcheck", { options(warn = 2) ms = list(mmce, acc, bac, tp, fp, tn, fn, tpr, fpr, tnr, fnr, ppv, npv, mcc, f1) lrn = makeLearner("classif.rpart") - + quickcheckTest( quickcheck::forall(data = as.data.frame(quickcheck::rmatrix(elements = quickcheck::rinteger, nrow = c(min = 2, max = 10000), ncol = c(min = 1, max = 100))), { classes = factor(c("foo", "bar")) data$target = rep_len(classes, length.out = nrow(data)) - + trainIds = 1:(2*nrow(data)/3) testIds = setdiff(1:nrow(data), trainIds) task = makeClassifTask(data = data, target = "target") - + mod = train(lrn, task = task, subset = trainIds) pred = predict(mod, task = task, subset = testIds) perf = performance(pred, measures = ms) - + is.numeric(unlist(perf)) && all(perf >= 0 && perf <= 1) } ), From 10d81d567552940c140a9f8541003c9b5a00a5c9 Mon Sep 17 00:00:00 2001 From: Jakob Richter Date: Thu, 2 Feb 2017 16:41:25 +0100 Subject: [PATCH 16/17] added assertion for Task --- R/Task_operators.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/Task_operators.R b/R/Task_operators.R index 4202d71a3c..5f72a6d7fe 100644 --- a/R/Task_operators.R +++ b/R/Task_operators.R @@ -472,6 +472,7 @@ getTaskWeights = function(task) { #' task = makeClassifTask(data = iris, target = "Species") #' getTaskDictionary(task) getTaskDictionary = function(task) { + assertClass(task, classes = "Task") dict = list( task = task, p = getTaskNFeats(task), From cb5fa80a1a13b517eb404029f60d8f325db03e7c Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Tue, 14 Feb 2017 16:12:50 +0100 Subject: [PATCH 17/17] docs, mini cleanup --- R/tuneParams.R | 15 ++++++--------- man/LearnerProperties.Rd | 2 +- man/evaluateParamExpressions.Learner.Rd | 1 - man/getTaskDictionary.Rd | 1 - man/tuneParams.Rd | 2 +- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/R/tuneParams.R b/R/tuneParams.R index 49e17f3fa6..304993db54 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -31,8 +31,8 @@ #' @family tune #' @note If you would like to include results from the training data set, make #' sure to appropriately adjust the resampling strategy and the aggregation for -#' the measure. See example code below.\cr -#' Note that learners and parameter sets can contain task dependent +#' the measure. See example code below. +#' Also note that learners and parameter sets can contain task dependent #' expressions, see \code{\link{evaluateParamExpressions}} for more information. #' @export #' @examples @@ -92,14 +92,13 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) { learner = checkLearner(learner) assertClass(task, classes = "Task") - dict = getTaskDictionary(task = task) - if (hasExpression(learner)) { - learner = evaluateParamExpressions(obj = learner, dict = dict) - } measures = checkMeasures(measures, learner) assertClass(par.set, classes = "ParamSet") - if (hasExpression(par.set)) + if (hasExpression(learner) || hasExpression(par.set)) { + dict = getTaskDictionary(task = task) + learner = evaluateParamExpressions(obj = learner, dict = dict) par.set = evaluateParamExpressions(obj = par.set, dict = dict) + } assertClass(control, classes = "TuneControl") if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) stop("Argument resampling must be of class ResampleDesc or ResampleInstance!") @@ -132,5 +131,3 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, sho messagef("[Tune] Result: %s : %s", paramValueToString(par.set, or$x), perfsToString(or$y)) return(or) } - - diff --git a/man/LearnerProperties.Rd b/man/LearnerProperties.Rd index f08525f200..08b3a54f5f 100644 --- a/man/LearnerProperties.Rd +++ b/man/LearnerProperties.Rd @@ -3,10 +3,10 @@ \name{LearnerProperties} \alias{LearnerProperties} \alias{getLearnerProperties} -\alias{hasExpression.Learner} \alias{hasLearnerProperties} \alias{getLearnerProperties} \alias{hasLearnerProperties} +\alias{hasExpression.Learner} \title{Query properties of learners.} \usage{ getLearnerProperties(learner) diff --git a/man/evaluateParamExpressions.Learner.Rd b/man/evaluateParamExpressions.Learner.Rd index 27810a0153..887f17c351 100644 --- a/man/evaluateParamExpressions.Learner.Rd +++ b/man/evaluateParamExpressions.Learner.Rd @@ -77,4 +77,3 @@ ps2 = evaluateParamExpressions(obj = ps1, dict = dict) ps1 ps2 } - diff --git a/man/getTaskDictionary.Rd b/man/getTaskDictionary.Rd index 92f20e5458..e19145405b 100644 --- a/man/getTaskDictionary.Rd +++ b/man/getTaskDictionary.Rd @@ -35,4 +35,3 @@ Other task: \code{\link{getTaskClassLevels}}, \code{\link{getTaskTargets}}, \code{\link{getTaskType}}, \code{\link{subsetTask}} } - diff --git a/man/tuneParams.Rd b/man/tuneParams.Rd index 0d07731dad..74ce8f1c7b 100644 --- a/man/tuneParams.Rd +++ b/man/tuneParams.Rd @@ -84,7 +84,7 @@ ps = makeParamSet( ) rdesc = makeResampleDesc("Subsample") ctrl = makeTuneControlRandom(maxit = 2L) -res = tuneParams(task = iris.task, learner = "classif.ksvm", par.set = ps, control = ctrl, resampling = rdesc) +res = tuneParams("classif.ksvm", iris.task, par.set = ps, control = ctrl, resampling = rdesc) \dontrun{ # we optimize the SVM over 3 kernels simultanously