getml.hyperopt
Automatically find the best parameters for
Multirel
Relboost
RelMT
FastProp
FastBoost
LinearRegression
LogisticRegression
XGBoostClassifier
XGBoostRegressor
Enterprise edition
This feature is exclusive to the Enterprise edition and is not available in the Community edition. Discover the benefits of the Enterprise edition and compare their features.
For licensing information and technical support, please contact us.
Example
The easiest way to conduct a hyperparameter optimization is to use the built-in tuning routines. Note that these tuning routines usually take a day to complete unless we use very small data sets as we do in this example.
from getml import data
from getml import datasets
from getml import engine
from getml import feature_learning
from getml.feature_learning import aggregations
from getml.feature_learning import loss_functions
from getml import hyperopt
from getml import pipeline
from getml import predictors
# ----------------
engine.set_project("examples")
# ----------------
population_table, peripheral_table = datasets.make_numerical()
# ----------------
# Construct placeholders
population_placeholder = data.Placeholder("POPULATION")
peripheral_placeholder = data.Placeholder("PERIPHERAL")
population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp")
# ----------------
feature_learner1 = feature_learning.Multirel(
aggregation=[
aggregations.COUNT,
aggregations.SUM
],
loss_function=loss_functions.SquareLoss,
num_features=10,
share_aggregations=1.0,
max_length=1,
num_threads=0
)
# ----------------
feature_learner2 = feature_learning.Relboost(
loss_function=loss_functions.SquareLoss,
num_features=10
)
# ----------------
predictor = predictors.LinearRegression()
# ----------------
pipe = pipeline.Pipeline(
population=population_placeholder,
peripheral=[peripheral_placeholder],
feature_learners=[feature_learner1, feature_learner2],
predictors=[predictor]
)
# ----------------
tuned_pipeline = getml.hyperopt.tune_feature_learners(
pipeline=base_pipeline,
population_table_training=population_table,
population_table_validation=population_table,
peripheral_tables=[peripheral_table]
)
# ----------------
tuned_pipeline = getml.hyperopt.tune_predictors(
pipeline=tuned_pipeline,
population_table_training=population_table,
population_table_validation=population_table,
peripheral_tables=[peripheral_table]
)
from getml import data
from getml import datasets
from getml import engine
from getml import feature_learning
from getml.feature_learning import aggregations
from getml.feature_learning import loss_functions
from getml import hyperopt
from getml import pipeline
from getml import predictors
# ----------------
engine.set_project("examples")
# ----------------
population_table, peripheral_table = datasets.make_numerical()
# ----------------
# Construct placeholders
population_placeholder = data.Placeholder("POPULATION")
peripheral_placeholder = data.Placeholder("PERIPHERAL")
population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp")
# ----------------
# Base model - any parameters not included
# in param_space will be taken from this.
feature_learner1 = feature_learning.Multirel(
aggregation=[
aggregations.COUNT,
aggregations.SUM
],
loss_function=loss_functions.SquareLoss,
num_features=10,
share_aggregations=1.0,
max_length=1,
num_threads=0
)
# ----------------
# Base model - any parameters not included
# in param_space will be taken from this.
feature_learner2 = feature_learning.Relboost(
loss_function=loss_functions.SquareLoss,
num_features=10
)
# ----------------
# Base model - any parameters not included
# in param_space will be taken from this.
predictor = predictors.LinearRegression()
# ----------------
pipe = pipeline.Pipeline(
population=population_placeholder,
peripheral=[peripheral_placeholder],
feature_learners=[feature_learner1, feature_learner2],
predictors=[predictor]
)
# ----------------
# Build a hyperparameter space.
# We have two feature learners and one
# predictor, so this is how we must
# construct our hyperparameter space.
# If we only wanted to optimize the predictor,
# we could just leave out the feature_learners.
param_space = {
"feature_learners": [
{
"num_features": [10, 50],
},
{
"max_depth": [1, 10],
"min_num_samples": [100, 500],
"num_features": [10, 50],
"reg_lambda": [0.0, 0.1],
"shrinkage": [0.01, 0.4]
}],
"predictors": [
{
"reg_lambda": [0.0, 10.0]
}
]
}
# ----------------
# Wrap a GaussianHyperparameterSearch around the reference model
gaussian_search = hyperopt.GaussianHyperparameterSearch(
pipeline=pipe,
param_space=param_space,
n_iter=30,
score=pipeline.scores.rsquared
)
gaussian_search.fit(
population_table_training=population_table,
population_table_validation=population_table,
peripheral_tables=[peripheral_table]
)
# ----------------
# We want 5 additional iterations.
gaussian_search.n_iter = 5
# We do not want another burn-in-phase,
# so we set ratio_iter to 0.
gaussian_search.ratio_iter = 0.0
# This widens the hyperparameter space.
gaussian_search.param_space["feature_learners"][1]["num_features"] = [10, 100]
# This narrows the hyperparameter space.
gaussian_search.param_space["predictors"][0]["reg_lambda"] = [0.0, 0.0]
# This continues the hyperparameter search using the previous iterations as
# prior knowledge.
gaussian_search.fit(
population_table_training=population_table,
population_table_validation=population_table,
peripheral_tables=[peripheral_table]
)
# ----------------
all_hyp = hyperopt.list_hyperopts()
best_pipeline = gaussian_search.best_pipeline
list_hyperopts
Lists all hyperparameter optimization objects present in the Engine.
Note that this function only lists hyperopts which are part of the current project. See set_project
for changing projects.
To subsequently load one of them, use load_hyperopt
.
RETURNS | DESCRIPTION |
---|---|
List[str] | list containing the names of all hyperopts. |
Source code in getml/hyperopt/helpers.py
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
|
tune_feature_learners
tune_feature_learners(
pipeline: Pipeline,
container: Container,
train: str = "train",
validation: str = "validation",
n_iter: int = 0,
score: Optional[str] = None,
num_threads: int = 0,
) -> Pipeline
A high-level interface for optimizing the feature learners of a Pipeline
.
Efficiently optimizes the hyperparameters for the set of feature learners (from feature_learning
) of a given pipeline by breaking each feature learner's hyperparameter space down into carefully curated subspaces: hyperopt_tuning_subspaces
and optimizing the hyperparameters for each subspace in a sequential multi-step process. For further details about the actual recipes behind the tuning routines refer to tuning routines: hyperopt_tuning
.
PARAMETER | DESCRIPTION |
---|---|
pipeline | Base pipeline used to derive all models fitted and scored during the hyperparameter optimization. It defines the data schema and any hyperparameters that are not optimized. TYPE: |
container | The data container used for the hyperparameter tuning. TYPE: |
train | The name of the subset in 'container' used for training. TYPE: |
validation | The name of the subset in 'container' used for validation. TYPE: |
n_iter | The number of iterations. TYPE: |
score | The score to optimize. Must be from |
num_threads | The number of parallel threads to use. If set to 0, the number of threads will be inferred. TYPE: |
RETURNS | DESCRIPTION |
---|---|
Pipeline | Pipeline containing tuned versions of the feature learners. |
Example
We assume that you have already set up your Pipeline
and Container
.
tuned_pipeline = getml.hyperopt.tune_predictors(
pipeline=base_pipeline,
container=container)
Source code in getml/hyperopt/tuning.py
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
|
tune_predictors
tune_predictors(
pipeline: Pipeline,
container: Container,
train: str = "train",
validation: str = "validation",
n_iter: int = 0,
score: Optional[str] = None,
num_threads: int = 0,
) -> Pipeline
A high-level interface for optimizing the predictors of a Pipeline
.
Efficiently optimizes the hyperparameters for the set of predictors (from getml.predictors
) of a given pipeline by breaking each predictor's hyperparameter space down into carefully curated subspaces: hyperopt_tuning_subspaces
and optimizing the hyperparameters for each subspace in a sequential multi-step process. For further details about the actual recipes behind the tuning routines refer to tuning routines: hyperopt_tuning
.
PARAMETER | DESCRIPTION |
---|---|
pipeline | Base pipeline used to derive all models fitted and scored during the hyperparameter optimization. It defines the data schema and any hyperparameters that are not optimized. TYPE: |
container | The data container used for the hyperparameter tuning. TYPE: |
train | The name of the subset in 'container' used for training. TYPE: |
validation | The name of the subset in 'container' used for validation. TYPE: |
n_iter | The number of iterations. TYPE: |
score | The score to optimize. Must be from |
num_threads | The number of parallel threads to use. If set to 0, the number of threads will be inferred. TYPE: |
Example
We assume that you have already set up your Pipeline
and Container
.
tuned_pipeline = getml.hyperopt.tune_predictors(
pipeline=base_pipeline,
container=container)
RETURNS | DESCRIPTION |
---|---|
Pipeline | Pipeline containing tuned predictors. |
Source code in getml/hyperopt/tuning.py
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 |
|
exists
Determines whether a hyperopt exists.
PARAMETER | DESCRIPTION |
---|---|
name | The name of the hyperopt. TYPE: |
RETURNS | DESCRIPTION |
---|---|
bool | A boolean indicating whether a hyperopt named 'name' exists. |
Source code in getml/hyperopt/helpers.py
50 51 52 53 54 55 56 57 58 59 60 61 62 |
|
delete
delete(name: str) -> None
If a hyperopt named 'name' exists, it is deleted.
PARAMETER | DESCRIPTION |
---|---|
name | The name of the hyperopt. TYPE: |
Source code in getml/hyperopt/helpers.py
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
|
load_hyperopt
load_hyperopt(
name: str,
) -> Union[
GaussianHyperparameterSearch,
LatinHypercubeSearch,
RandomSearch,
]
Loads a hyperparameter optimization object from the getML Engine into Python.
PARAMETER | DESCRIPTION |
---|---|
name | The name of the hyperopt to be loaded. TYPE: |
RETURNS | DESCRIPTION |
---|---|
Union[GaussianHyperparameterSearch, LatinHypercubeSearch, RandomSearch] | The hyperopt object. |
Source code in getml/hyperopt/load_hyperopt.py
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
|
kernels
Collection of kernel functions to be used by the hyperparameter optimizations.
exp module-attribute
exp = 'exp'
An exponential kernel yielding non-differentiable sample paths.
gauss module-attribute
gauss = 'gauss'
A Gaussian kernel yielding analytic (infinitely--differentiable) sample paths.
matern32 module-attribute
matern32 = 'matern32'
A Matérn 3/2 kernel yielding once-differentiable sample paths.
matern52 module-attribute
matern52 = 'matern52'
A Matérn 5/2 kernel yielding twice-differentiable sample paths.
optimization
Collection of optimization algorithms to be used by the hyperparameter optimizations.
bfgs module-attribute
bfgs = 'bfgs'
Broyden-Fletcher-Goldbarb-Shanno optimization algorithm.
The BFGS algorithm is a quasi-Newton method that requires the function to be differentiable.
nelder_mead module-attribute
nelder_mead = 'nelderMead'
Nelder-Mead optimization algorithm.
Nelder-Mead is a direct search method that does not require functions to be differentiable.
burn_in
Collection of burn-in algorithms to be used by the hyperparameter optimizations.
latin_hypercube module-attribute
latin_hypercube = 'latinHypercube'
Samples from the hyperparameter space almost randomly, but ensures that the different draws are sufficiently different from each other.
random module-attribute
random = 'random'
Samples from the hyperparameter space at random.