Source code for hsr4hci.base_models
"""
Methods for creating HSR base models.
"""
# -----------------------------------------------------------------------------
# IMPORTS
# -----------------------------------------------------------------------------
from copy import deepcopy
from functools import lru_cache
from typing import Any
import numpy as np
from hsr4hci.importing import get_member_by_name
from hsr4hci.typehinting import RegressorModel
# -----------------------------------------------------------------------------
# CLASS DEFINITIONS
# -----------------------------------------------------------------------------
[docs]class BaseModelCreator:
"""
Wrapper class for creating new base model instances.
Example:
>>> base_model_config = {
>>> 'module': 'sklearn.linear_model',
>>> 'class': 'LinearRegression',
>>> 'parameters': {'fit_intercept': False},
>>> }
>>> bmc = BaseModelCreator(**base_model_config)
>>> model = bmc.get_model_instance()
>>> model
LinearRegression(fit_intercept=False)
.. note::
Ideally, this function should simply take three arguments
instead of a dictionary. The reason behind the current version
is a poor early design choice for the experiment configuration
files: The "class" parameter should have been called "name"
instead, because ``class`` is a protected key word in Python
that cannot be used as the name of an input parameter.
However, changing this now would require updating all experiment
configuration files and all training scripts...
"""
[docs] def __init__(self, **base_model_config: Any) -> None:
"""
Args:
**base_model_config: A ``dict`` containing the configuration
of the base model. It needs to have exactly three keys
(see example above):
- ``module``: A string with the module from which the
base model should be imported.
- ``class``: A string with the class (= name) of the
base model.
- ``parameters``: A dictionary with additional keyword
arguments that will be passed to the constructor of
``module.class``. Can be empty: ``{}``.
"""
# Unpack base model configuration
self.module_name = base_model_config['module']
self.class_name = base_model_config['class']
self.parameters = base_model_config['parameters']
[docs] @lru_cache(maxsize=1)
def get_model_instance(self) -> RegressorModel:
"""
Get a new instance of the base model defined in the config.
Returns:
An instance of a regression method (e.g., from ``sklearn``)
that must provide the ``.fit()`` and ``.predict()`` methods.
"""
# Get the model class and the model parameters
model_class = get_member_by_name(
module_name=self.module_name, member_name=self.class_name
)
model_parameters = deepcopy(self.parameters)
# Augment the model parameters:
# For RidgeCV models, we have to parse the ``alphas`` parameter (i.e.,
# the regularization strengths) into a geometrically spaced array
if (
self.class_name in ('RidgeCV', 'LassoCV')
and 'alphas' in model_parameters.keys()
):
model_parameters['alphas'] = np.geomspace(
*model_parameters['alphas']
)
# Instantiate a new model of the given class with the desired params
model: RegressorModel = model_class(**model_parameters)
return model