Skip to content

getml.hyperopt

Automatically find the best parameters for

Enterprise edition

This feature is exclusive to the Enterprise edition and is not available in the Community edition. Discover the benefits of the Enterprise edition and compare their features.

For licensing information and technical support, please contact us.

Example

The easiest way to conduct a hyperparameter optimization is to use the built-in tuning routines. Note that these tuning routines usually take a day to complete unless we use very small data sets as we do in this example.

from getml import data
from getml import datasets
from getml import engine
from getml import feature_learning
from getml.feature_learning import aggregations
from getml.feature_learning import loss_functions
from getml import hyperopt
from getml import pipeline
from getml import predictors

# ----------------

engine.set_project("examples")

# ----------------

population_table, peripheral_table = datasets.make_numerical()

# ----------------
# Construct placeholders

population_placeholder = data.Placeholder("POPULATION")
peripheral_placeholder = data.Placeholder("PERIPHERAL")
population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp")

# ----------------

feature_learner1 = feature_learning.Multirel(
    aggregation=[
        aggregations.COUNT,
        aggregations.SUM
    ],
    loss_function=loss_functions.SquareLoss,
    num_features=10,
    share_aggregations=1.0,
    max_length=1,
    num_threads=0
)

# ----------------

feature_learner2 = feature_learning.Relboost(
    loss_function=loss_functions.SquareLoss,
    num_features=10
)

# ----------------

predictor = predictors.LinearRegression()

# ----------------

pipe = pipeline.Pipeline(
    population=population_placeholder,
    peripheral=[peripheral_placeholder],
    feature_learners=[feature_learner1, feature_learner2],
    predictors=[predictor]
)

# ----------------

tuned_pipeline = getml.hyperopt.tune_feature_learners(
    pipeline=base_pipeline,
    population_table_training=population_table,
    population_table_validation=population_table,
    peripheral_tables=[peripheral_table]
)

# ----------------

tuned_pipeline = getml.hyperopt.tune_predictors(
    pipeline=tuned_pipeline,
    population_table_training=population_table,
    population_table_validation=population_table,
    peripheral_tables=[peripheral_table]
)
If you want to define the hyperparameter space and the tuning routing yourself, this is how you can do that:

from getml import data
from getml import datasets
from getml import engine
from getml import feature_learning
from getml.feature_learning import aggregations
from getml.feature_learning import loss_functions
from getml import hyperopt
from getml import pipeline
from getml import predictors

# ----------------

engine.set_project("examples")

# ----------------

population_table, peripheral_table = datasets.make_numerical()

# ----------------
# Construct placeholders

population_placeholder = data.Placeholder("POPULATION")
peripheral_placeholder = data.Placeholder("PERIPHERAL")
population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp")

# ----------------
# Base model - any parameters not included
# in param_space will be taken from this.

feature_learner1 = feature_learning.Multirel(
    aggregation=[
        aggregations.COUNT,
        aggregations.SUM
    ],
    loss_function=loss_functions.SquareLoss,
    num_features=10,
    share_aggregations=1.0,
    max_length=1,
    num_threads=0
)

# ----------------
# Base model - any parameters not included
# in param_space will be taken from this.

feature_learner2 = feature_learning.Relboost(
    loss_function=loss_functions.SquareLoss,
    num_features=10
)

# ----------------
# Base model - any parameters not included
# in param_space will be taken from this.

predictor = predictors.LinearRegression()

# ----------------

pipe = pipeline.Pipeline(
    population=population_placeholder,
    peripheral=[peripheral_placeholder],
    feature_learners=[feature_learner1, feature_learner2],
    predictors=[predictor]
)

# ----------------
# Build a hyperparameter space.
# We have two feature learners and one
# predictor, so this is how we must
# construct our hyperparameter space.
# If we only wanted to optimize the predictor,
# we could just leave out the feature_learners.

param_space = {
    "feature_learners": [
        {
            "num_features": [10, 50],
        },
        {
            "max_depth": [1, 10],
            "min_num_samples": [100, 500],
            "num_features": [10, 50],
            "reg_lambda": [0.0, 0.1],
            "shrinkage": [0.01, 0.4]
        }],
    "predictors": [
        {
            "reg_lambda": [0.0, 10.0]
        }
    ]
}

# ----------------
# Wrap a GaussianHyperparameterSearch around the reference model

gaussian_search = hyperopt.GaussianHyperparameterSearch(
    pipeline=pipe,
    param_space=param_space,
    n_iter=30,
    score=pipeline.scores.rsquared
)

gaussian_search.fit(
    population_table_training=population_table,
    population_table_validation=population_table,
    peripheral_tables=[peripheral_table]
)

# ----------------

# We want 5 additional iterations.
gaussian_search.n_iter = 5

# We do not want another burn-in-phase,
# so we set ratio_iter to 0.
gaussian_search.ratio_iter = 0.0

# This widens the hyperparameter space.
gaussian_search.param_space["feature_learners"][1]["num_features"] = [10, 100]

# This narrows the hyperparameter space.
gaussian_search.param_space["predictors"][0]["reg_lambda"] = [0.0, 0.0]

# This continues the hyperparameter search using the previous iterations as
# prior knowledge.
gaussian_search.fit(
    population_table_training=population_table,
    population_table_validation=population_table,
    peripheral_tables=[peripheral_table]
)

# ----------------

all_hyp = hyperopt.list_hyperopts()

best_pipeline = gaussian_search.best_pipeline

list_hyperopts

list_hyperopts() -> List[str]

Lists all hyperparameter optimization objects present in the Engine.

Note that this function only lists hyperopts which are part of the current project. See set_project for changing projects.

To subsequently load one of them, use load_hyperopt.

RETURNS DESCRIPTION
List[str]

list containing the names of all hyperopts.

Source code in getml/hyperopt/helpers.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def list_hyperopts() -> List[str]:
    """Lists all hyperparameter optimization objects present in the Engine.

    Note that this function only lists hyperopts which are part of the
    current project. See [`set_project`][getml.engine.set_project] for
    changing projects.

    To subsequently load one of them, use
    [`load_hyperopt`][getml.hyperopt.load_hyperopt.load_hyperopt].

    Returns:
        list containing the names of all hyperopts.

    """

    cmd: Dict[str, Any] = {}
    cmd["type_"] = "list_hyperopts"
    cmd["name_"] = ""

    with comm.send_and_get_socket(cmd) as sock:
        msg = comm.recv_string(sock)
        if msg != "Success!":
            comm.handle_engine_exception(msg)
        json_str = comm.recv_string(sock)

    return json.loads(json_str)["names"]

tune_feature_learners

tune_feature_learners(
    pipeline: Pipeline,
    container: Container,
    train: str = "train",
    validation: str = "validation",
    n_iter: int = 0,
    score: Optional[str] = None,
    num_threads: int = 0,
) -> Pipeline

A high-level interface for optimizing the feature learners of a Pipeline.

Efficiently optimizes the hyperparameters for the set of feature learners (from feature_learning) of a given pipeline by breaking each feature learner's hyperparameter space down into carefully curated subspaces: hyperopt_tuning_subspaces and optimizing the hyperparameters for each subspace in a sequential multi-step process. For further details about the actual recipes behind the tuning routines refer to tuning routines: hyperopt_tuning.

PARAMETER DESCRIPTION
pipeline

Base pipeline used to derive all models fitted and scored during the hyperparameter optimization. It defines the data schema and any hyperparameters that are not optimized.

TYPE: Pipeline

container

The data container used for the hyperparameter tuning.

TYPE: Container

train

The name of the subset in 'container' used for training.

TYPE: str DEFAULT: 'train'

validation

The name of the subset in 'container' used for validation.

TYPE: str DEFAULT: 'validation'

n_iter

The number of iterations.

TYPE: int DEFAULT: 0

score

The score to optimize. Must be from metrics.

TYPE: Optional[str] DEFAULT: None

num_threads

The number of parallel threads to use. If set to 0, the number of threads will be inferred.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
Pipeline

Pipeline containing tuned versions of the feature learners.

Example

We assume that you have already set up your Pipeline and Container.

tuned_pipeline = getml.hyperopt.tune_predictors(
    pipeline=base_pipeline,
    container=container)
Source code in getml/hyperopt/tuning.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
def tune_feature_learners(
    pipeline: Pipeline,
    container: Container,
    train: str = "train",
    validation: str = "validation",
    n_iter: int = 0,
    score: Optional[str] = None,
    num_threads: int = 0,
) -> Pipeline:
    """
    A high-level interface for optimizing the feature learners of a
    [`Pipeline`][getml.pipeline.Pipeline].

    Efficiently optimizes the hyperparameters for the set of feature learners
    (from [`feature_learning`][getml.feature_learning]) of a given pipeline by breaking each
    feature learner's hyperparameter space down into carefully curated
    subspaces: `hyperopt_tuning_subspaces` and optimizing the hyperparameters for
    each subspace in a sequential multi-step process.  For further details about
    the actual recipes behind the tuning routines refer
    to tuning routines: `hyperopt_tuning`.

    Args:
        pipeline:
            Base pipeline used to derive all models fitted and scored
            during the hyperparameter optimization. It defines the data
            schema and any hyperparameters that are not optimized.

        container:
            The data container used for the hyperparameter tuning.

        train:
            The name of the subset in 'container' used for training.

        validation:
            The name of the subset in 'container' used for validation.

        n_iter:
            The number of iterations.

        score:
            The score to optimize. Must be from
            [`metrics`][getml.pipeline.metrics].

        num_threads:
            The number of parallel threads to use. If set to 0,
            the number of threads will be inferred.

    Returns:
        Pipeline containing tuned versions of the feature learners.

    ??? example
        We assume that you have already set up your
        [`Pipeline`][getml.Pipeline] and
        [`Container`][getml.data.Container].

        ```python
        tuned_pipeline = getml.hyperopt.tune_predictors(
            pipeline=base_pipeline,
            container=container)
        ```
    """

    if not isinstance(pipeline, getml.pipeline.Pipeline):
        raise TypeError("'pipeline' must be a pipeline!")

    pipeline._validate()

    if not score:
        score = _infer_score(pipeline)

    tuned_feature_learners = []

    for feature_learner in pipeline.feature_learners:
        tuned_pipeline = _tune_feature_learner(
            feature_learner=feature_learner,
            pipeline=pipeline,
            container=container,
            train=train,
            validation=validation,
            n_iter=n_iter,
            score=score,
            num_threads=num_threads,
        )

        assert len(tuned_pipeline.feature_learners) == 1, (
            "Expected exactly one feature learner, got "
            + str(len(tuned_pipeline.feature_learners))
        )

        tuned_feature_learners.append(tuned_pipeline.feature_learners[0])

    return _make_final_pipeline(
        pipeline,
        tuned_feature_learners,
        copy.deepcopy(pipeline.predictors),
        container,
        train,
        validation,
    )

tune_predictors

tune_predictors(
    pipeline: Pipeline,
    container: Container,
    train: str = "train",
    validation: str = "validation",
    n_iter: int = 0,
    score: Optional[str] = None,
    num_threads: int = 0,
) -> Pipeline

A high-level interface for optimizing the predictors of a Pipeline.

Efficiently optimizes the hyperparameters for the set of predictors (from getml.predictors) of a given pipeline by breaking each predictor's hyperparameter space down into carefully curated subspaces: hyperopt_tuning_subspaces and optimizing the hyperparameters for each subspace in a sequential multi-step process. For further details about the actual recipes behind the tuning routines refer to tuning routines: hyperopt_tuning.

PARAMETER DESCRIPTION
pipeline

Base pipeline used to derive all models fitted and scored during the hyperparameter optimization. It defines the data schema and any hyperparameters that are not optimized.

TYPE: Pipeline

container

The data container used for the hyperparameter tuning.

TYPE: Container

train

The name of the subset in 'container' used for training.

TYPE: str DEFAULT: 'train'

validation

The name of the subset in 'container' used for validation.

TYPE: str DEFAULT: 'validation'

n_iter

The number of iterations.

TYPE: int DEFAULT: 0

score

The score to optimize. Must be from metrics.

TYPE: Optional[str] DEFAULT: None

num_threads

The number of parallel threads to use. If set to 0, the number of threads will be inferred.

TYPE: int DEFAULT: 0

Example

We assume that you have already set up your Pipeline and Container.

tuned_pipeline = getml.hyperopt.tune_predictors(
    pipeline=base_pipeline,
    container=container)
RETURNS DESCRIPTION
Pipeline

Pipeline containing tuned predictors.

Source code in getml/hyperopt/tuning.py
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
def tune_predictors(
    pipeline: Pipeline,
    container: Container,
    train: str = "train",
    validation: str = "validation",
    n_iter: int = 0,
    score: Optional[str] = None,
    num_threads: int = 0,
) -> Pipeline:
    """
    A high-level interface for optimizing the predictors of a
    [`Pipeline`][getml.Pipeline].

    Efficiently optimizes the hyperparameters for the set of predictors (from
    [`getml.predictors`][getml.predictors]) of a given pipeline by breaking each
    predictor's
    hyperparameter space down into carefully curated
    subspaces: `hyperopt_tuning_subspaces` and optimizing the hyperparameters for
    each subspace in a sequential multi-step process.  For further details about
    the actual recipes behind the tuning routines refer to
    tuning routines: `hyperopt_tuning`.

    Args:
        pipeline:
            Base pipeline used to derive all models fitted and scored
            during the hyperparameter optimization. It defines the data
            schema and any hyperparameters that are not optimized.

        container:
            The data container used for the hyperparameter tuning.

        train:
            The name of the subset in 'container' used for training.

        validation:
            The name of the subset in 'container' used for validation.

        n_iter:
            The number of iterations.

        score:
            The score to optimize. Must be from
            [`metrics`][getml.pipeline.metrics].

        num_threads:
            The number of parallel threads to use. If set to 0,
            the number of threads will be inferred.

    ??? example
        We assume that you have already set up your
        [`Pipeline`][getml.Pipeline] and
        [`Container`][getml.data.Container].

        ```python
        tuned_pipeline = getml.hyperopt.tune_predictors(
            pipeline=base_pipeline,
            container=container)
        ```

    Returns:
        Pipeline containing tuned predictors.

    """

    if not isinstance(pipeline, getml.pipeline.Pipeline):
        raise TypeError("'pipeline' must be a pipeline!")

    pipeline._validate()

    if not score:
        score = _infer_score(pipeline)

    tuned_predictors = []

    for predictor in pipeline.predictors:
        tuned_pipeline = _tune_predictor(
            predictor=predictor,
            pipeline=pipeline,
            container=container,
            train=train,
            validation=validation,
            n_iter=n_iter,
            score=score,
            num_threads=num_threads,
        )

        assert len(tuned_pipeline.predictors) == 1, (
            "Expected exactly one predictor, got " + str(len(tuned_pipeline.predictors))
        )

        tuned_predictors.append(tuned_pipeline.predictors[0])

    return _make_final_pipeline(
        pipeline,
        copy.deepcopy(pipeline.feature_learners),
        tuned_predictors,
        container,
        train,
        validation,
    )

exists

exists(name: str) -> bool

Determines whether a hyperopt exists.

PARAMETER DESCRIPTION
name

The name of the hyperopt.

TYPE: str

RETURNS DESCRIPTION
bool

A boolean indicating whether a hyperopt named 'name' exists.

Source code in getml/hyperopt/helpers.py
50
51
52
53
54
55
56
57
58
59
60
61
62
def exists(name: str) -> bool:
    """Determines whether a hyperopt exists.

    Args:
        name: The name of the hyperopt.

    Returns:
        A boolean indicating whether a hyperopt named 'name' exists.
    """
    if not isinstance(name, str):
        raise TypeError("'name' must be of type str")

    return name in list_hyperopts()

delete

delete(name: str) -> None

If a hyperopt named 'name' exists, it is deleted.

PARAMETER DESCRIPTION
name

The name of the hyperopt.

TYPE: str

Source code in getml/hyperopt/helpers.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def delete(name: str) -> None:
    """
    If a hyperopt named 'name' exists, it is deleted.

    Args:
        name: The name of the hyperopt.
    """

    if not exists(name):
        return

    cmd: Dict[str, Any] = {}

    cmd["type_"] = "Hyperopt.delete"
    cmd["name_"] = name

    with comm.send_and_get_socket(cmd) as sock:
        msg = comm.recv_string(sock)
        if msg != "Success!":
            comm.handle_engine_exception(msg)

load_hyperopt

Loads a hyperparameter optimization object from the getML Engine into Python.

PARAMETER DESCRIPTION
name

The name of the hyperopt to be loaded.

TYPE: str

RETURNS DESCRIPTION
Union[GaussianHyperparameterSearch, LatinHypercubeSearch, RandomSearch]

The hyperopt object.

Source code in getml/hyperopt/load_hyperopt.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def load_hyperopt(
    name: str,
) -> Union[GaussianHyperparameterSearch, LatinHypercubeSearch, RandomSearch]:
    """Loads a hyperparameter optimization object from the getML Engine into Python.

    Args:
        name:
            The name of the hyperopt to be loaded.

    Returns:
        The hyperopt object.

    """
    # This will be overwritten by .refresh(...) anyway
    dummy_pipeline = _make_dummy("123456")

    dummy_param_space = {"predictors": [{"reg_lambda": [0.0, 1.0]}]}

    json_obj = _get_json_obj(name)

    if json_obj["type_"] == "GaussianHyperparameterSearch":
        return GaussianHyperparameterSearch(
            param_space=dummy_param_space, pipeline=dummy_pipeline
        )._parse_json_obj(json_obj)

    if json_obj["type_"] == "LatinHypercubeSearch":
        return LatinHypercubeSearch(
            param_space=dummy_param_space, pipeline=dummy_pipeline
        )._parse_json_obj(json_obj)

    if json_obj["type_"] == "RandomSearch":
        return RandomSearch(
            param_space=dummy_param_space, pipeline=dummy_pipeline
        )._parse_json_obj(json_obj)

    raise ValueError("Unknown type: '" + json_obj["type_"] + "'!")

kernels

Collection of kernel functions to be used by the hyperparameter optimizations.

exp module-attribute

exp = 'exp'

An exponential kernel yielding non-differentiable sample paths.

gauss module-attribute

gauss = 'gauss'

A Gaussian kernel yielding analytic (infinitely--differentiable) sample paths.

matern32 module-attribute

matern32 = 'matern32'

A Matérn 3/2 kernel yielding once-differentiable sample paths.

matern52 module-attribute

matern52 = 'matern52'

A Matérn 5/2 kernel yielding twice-differentiable sample paths.

optimization

Collection of optimization algorithms to be used by the hyperparameter optimizations.

bfgs module-attribute

bfgs = 'bfgs'

Broyden-Fletcher-Goldbarb-Shanno optimization algorithm.

The BFGS algorithm is a quasi-Newton method that requires the function to be differentiable.

nelder_mead module-attribute

nelder_mead = 'nelderMead'

Nelder-Mead optimization algorithm.

Nelder-Mead is a direct search method that does not require functions to be differentiable.

burn_in

Collection of burn-in algorithms to be used by the hyperparameter optimizations.

latin_hypercube module-attribute

latin_hypercube = 'latinHypercube'

Samples from the hyperparameter space almost randomly, but ensures that the different draws are sufficiently different from each other.

random module-attribute

random = 'random'

Samples from the hyperparameter space at random.