Source code for petab_select.model

"""The `Model` class."""
import warnings
from os.path import relpath
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

import petab
import yaml
from more_itertools import one
from petab.C import ESTIMATE, NOMINAL_VALUE

from .constants import (
    CRITERIA,
    ESTIMATED_PARAMETERS,
    MODEL_HASH,
    MODEL_ID,
    MODEL_SUBSPACE_ID,
    MODEL_SUBSPACE_INDICES,
    PARAMETERS,
    PETAB_ESTIMATE_TRUE,
    PETAB_PROBLEM,
    PETAB_YAML,
    PREDECESSOR_MODEL_HASH,
    TYPE_CRITERION,
    TYPE_PARAMETER,
    TYPE_PATH,
    VIRTUAL_INITIAL_MODEL,
    Criterion,
)
from .criteria import CriterionComputer
from .misc import (
    hash_list,
    hash_parameter_dict,
    hash_str,
    parameter_string_to_value,
)
from .petab import PetabMixin

__all__ = [
    'Model',
    'default_compare',
    'models_from_yaml_list',
    'models_to_yaml_list',
]


[docs]class Model(PetabMixin): """A (possibly uncalibrated) model. NB: some of these attribute names correspond to constants defined in the `constants.py` file, to facilitate loading models from/saving models to disk (see the `saved_attributes` attribute). Attributes: converters_load: Functions to convert attributes from YAML to :class:`Model`. converters_save: Functions to convert attributes from :class:`Model` to YAML. criteria: The criteria values of the calibrated model (e.g. AIC). hash_attributes: This attribute is currently not used. Attributes that will be used to calculate the hash of the :class:`Model` instance. NB: this hash is used during pairwise comparison to determine whether any two :class:`Model` instances are unique. The model instances are compared by their parameter estimation problems, as opposed to parameter estimation results, which may differ due to e.g. floating-point arithmetic. model_id: The model ID. petab_yaml: The path to the PEtab problem YAML file. parameters: Parameter values that will overwrite the PEtab problem definition, or change parameters to be estimated. estimated_parameters: Parameter estimates from a model calibration tool, for parameters that are specified as estimated in the PEtab problem or PEtab Select model YAML. These are untransformed values (i.e., not on log scale). saved_attributes: Attributes that will be saved to disk by the :meth:`Model.to_yaml` method. """ saved_attributes = ( MODEL_ID, MODEL_SUBSPACE_ID, MODEL_SUBSPACE_INDICES, MODEL_HASH, PREDECESSOR_MODEL_HASH, PETAB_YAML, PARAMETERS, ESTIMATED_PARAMETERS, CRITERIA, ) converters_load = { MODEL_ID: lambda x: x, MODEL_SUBSPACE_ID: lambda x: x, MODEL_SUBSPACE_INDICES: lambda x: [] if not x else x, MODEL_HASH: lambda x: x, PREDECESSOR_MODEL_HASH: lambda x: x, PETAB_YAML: lambda x: x, PARAMETERS: lambda x: x, ESTIMATED_PARAMETERS: lambda x: x, CRITERIA: lambda x: { # `criterion_id_value` is the ID of the criterion in the enum `Criterion`. Criterion(criterion_id_value): float(criterion_value) for criterion_id_value, criterion_value in x.items() }, } converters_save = { MODEL_ID: lambda x: str(x), MODEL_SUBSPACE_ID: lambda x: str(x), MODEL_SUBSPACE_INDICES: lambda x: [int(xi) for xi in x], MODEL_HASH: lambda x: str(x), PREDECESSOR_MODEL_HASH: lambda x: str(x) if x is not None else x, PETAB_YAML: lambda x: str(x), PARAMETERS: lambda x: {str(k): v for k, v in x.items()}, # FIXME handle with a `set_estimated_parameters` method instead? # to avoid `float` cast here. Reason for cast is because e.g. pyPESTO # can provide type `np.float64`, which causes issues when writing to # YAML. # ESTIMATED_PARAMETERS: lambda x: x, ESTIMATED_PARAMETERS: lambda x: { str(id): float(value) for id, value in x.items() }, CRITERIA: lambda x: { criterion_id.value: float(criterion_value) for criterion_id, criterion_value in x.items() }, } hash_attributes = { # MODEL_ID: lambda x: hash(x), # possible circular dependency on hash # MODEL_SUBSPACE_ID: lambda x: hash(x), # MODEL_SUBSPACE_INDICES: hash_list, # TODO replace `YAML` with `PETAB_PROBLEM_HASH`, as YAML could refer to # different problems if used on different filesystems or sometimes # absolute and other times relative. Better to check whether the # PEtab problem itself is unique. # TODO replace `PARAMETERS` with `PARAMETERS_ALL`, which should be al # parameters in the PEtab problem. This avoids treating the PEtab problem # differently to the model (in a subspace with the PEtab problem) that has # all nominal values defined in the subspace. # TODO add `estimated_parameters`? Needs to be clarified whether this hash # should be unique amongst only not-yet-calibrated models, or may also # return the same value between differently parameterized models that ended # up being calibrated to be the same... probably should be the former. # Currently, the hash is stored, hence will "persist" after calibration # if the same `Model` instance is used. # PETAB_YAML: lambda x: hash(x), PETAB_YAML: hash_str, PARAMETERS: hash_parameter_dict, }
[docs] def __init__( self, petab_yaml: TYPE_PATH, model_subspace_id: str = None, model_id: str = None, model_subspace_indices: List[int] = None, predecessor_model_hash: str = None, parameters: Dict[str, Union[int, float]] = None, estimated_parameters: Dict[str, Union[int, float]] = None, criteria: Dict[str, float] = None, # Optionally provided to reduce repeated parsing of `petab_yaml`. petab_problem: Optional[petab.Problem] = None, model_hash: Optional[Any] = None, ): self.model_id = model_id self.model_subspace_id = model_subspace_id self.model_subspace_indices = model_subspace_indices # TODO clean parameters, ensure single float or str (`ESTIMATE`) type self.parameters = parameters self.estimated_parameters = estimated_parameters self.criteria = criteria self.model_hash = model_hash self.predecessor_model_hash = predecessor_model_hash if self.parameters is None: self.parameters = {} if self.estimated_parameters is None: self.estimated_parameters = {} if self.criteria is None: self.criteria = {} super().__init__(petab_yaml=petab_yaml, petab_problem=petab_problem) if self.model_id is None: self.model_id = self.get_hash() self.criterion_computer = CriterionComputer(self)
[docs] def set_criterion(self, criterion: Criterion, value: float) -> None: """Set a criterion value for the model. Args: criterion: The criterion (e.g. ``petab_select.constants.Criterion.AIC``). value: The criterion value for the (presumably calibrated) model. """ if criterion in self.criteria: warnings.warn( 'Overwriting saved criterion value. ' f'Criterion: {criterion}. Value: {self.get_criterion(criterion)}.' ) # FIXME debug why value is overwritten during test case 0002. if False: print( 'Overwriting saved criterion value. ' f'Criterion: {criterion}. Value: {self.get_criterion(criterion)}.' ) breakpoint() self.criteria[criterion] = value
[docs] def has_criterion(self, criterion: Criterion) -> bool: """Check whether the model provides a value for a criterion. Args: criterion: The criterion (e.g. `petab_select.constants.Criterion.AIC`). """ # TODO also `and self.criteria[id] is not None`? return criterion in self.criteria
[docs] def get_criterion( self, criterion: Criterion, compute: bool = True, raise_on_failure: bool = True, ) -> Union[TYPE_CRITERION, None]: """Get a criterion value for the model. Args: criterion: The ID of the criterion (e.g. ``petab_select.constants.Criterion.AIC``). compute: Whether to try to compute the criterion value based on other model attributes. For example, if the ``'AIC'`` criterion is requested, this can be computed from a predetermined model likelihood and its number of estimated parameters. raise_on_failure: Whether to raise a `ValueError` if the criterion could not be computed. If `False`, `None` is returned. Returns: The criterion value, or `None` if it is not available. TODO check for previous use of this method before `.get` was used """ if criterion not in self.criteria and compute: self.compute_criterion( criterion=criterion, raise_on_failure=raise_on_failure, ) # value = self.criterion_computer(criterion=id) # self.set_criterion(id=id, value=value) return self.criteria.get(criterion, None)
[docs] def compute_criterion( self, criterion: Criterion, raise_on_failure: bool = True, ) -> TYPE_CRITERION: """Compute a criterion value for the model. The value will also be stored, which will overwrite any previously stored value for the criterion. Args: criterion: The ID of the criterion (e.g. :obj:`petab_select.constants.Criterion.AIC`). raise_on_failure: Whether to raise a `ValueError` if the criterion could not be computed. If `False`, `None` is returned. Returns: The criterion value. """ try: criterion_value = self.criterion_computer(criterion) self.set_criterion(criterion, criterion_value) result = criterion_value except ValueError: if raise_on_failure: raise result = None return result
[docs] def set_estimated_parameters( self, estimated_parameters: Dict[str, float], scaled: bool = False, ) -> None: """Set the estimated parameters. Args: estimated_parameters: The estimated parameters. scaled: Whether the ``estimated_parameters`` values are on the scale defined in the PEtab problem (``True``), or untransformed (``False``). """ if scaled: estimated_parameters = self.petab_problem.unscale_parameters( estimated_parameters ) self.estimated_parameters = estimated_parameters
[docs] @staticmethod def from_dict( model_dict: Dict[str, Any], base_path: TYPE_PATH = None, petab_problem: petab.Problem = None, ) -> 'Model': """Generate a model from a dictionary of attributes. Args: model_dict: A dictionary of attributes. The keys are attribute names, the values are the corresponding attribute values for the model. Required attributes are the required arguments of the :meth:`Model.__init__` method. base_path: The path that any relative paths in the model are relative to (e.g. the path to the PEtab problem YAML file :meth:`Model.petab_yaml` may be relative). petab_problem: Optionally provide the PEtab problem, to avoid loading it multiple times. NB: This may cause issues if multiple models write to the same PEtab problem in memory. Returns: A model instance, initialized with the provided attributes. """ unknown_attributes = set(model_dict).difference(Model.converters_load) if unknown_attributes: warnings.warn( 'Ignoring unknown attributes: ' + ', '.join(unknown_attributes) ) if base_path is not None: model_dict[PETAB_YAML] = base_path / model_dict[PETAB_YAML] model_dict = { attribute: Model.converters_load[attribute](value) for attribute, value in model_dict.items() if attribute in Model.converters_load } model_dict[PETAB_PROBLEM] = petab_problem return Model(**model_dict)
[docs] @staticmethod def from_yaml(model_yaml: TYPE_PATH) -> 'Model': """Generate a model from a PEtab Select model YAML file. Args: model_yaml: The path to the PEtab Select model YAML file. Returns: A model instance, initialized with the provided attributes. """ with open(str(model_yaml)) as f: model_dict = yaml.safe_load(f) # TODO check that the hash is reproducible if isinstance(model_dict, list): try: model_dict = one(model_dict) except ValueError: if len(model_dict) <= 1: raise raise ValueError( 'The provided YAML file contains a list with greater than ' 'one element. Use the `models_from_yaml_list` method or ' 'provide a PEtab Select model YAML file with only one ' 'model specified.' ) return Model.from_dict(model_dict, base_path=Path(model_yaml).parent)
[docs] def to_dict( self, resolve_paths: bool = True, paths_relative_to: Union[str, Path] = None, ) -> Dict[str, Any]: """Generate a dictionary from the attributes of a :class:`Model` instance. Args: resolve_paths: Whether to resolve relative paths into absolute paths. paths_relative_to: If not ``None``, paths will be converted to be relative to this path. Takes priority over ``resolve_paths``. Returns: A dictionary of attributes. The keys are attribute names, the values are the corresponding attribute values for the model. Required attributes are the required arguments of the :meth:`Model.__init__` method. """ model_dict = {} for attribute in self.saved_attributes: model_dict[attribute] = self.converters_save[attribute]( getattr(self, attribute) ) # TODO test if resolve_paths: if model_dict[PETAB_YAML]: model_dict[PETAB_YAML] = str( Path(model_dict[PETAB_YAML]).resolve() ) if paths_relative_to is not None: if model_dict[PETAB_YAML]: model_dict[PETAB_YAML] = relpath( Path(model_dict[PETAB_YAML]).resolve(), Path(paths_relative_to).resolve(), ) return model_dict
[docs] def to_yaml(self, petab_yaml: TYPE_PATH, *args, **kwargs) -> None: """Generate a PEtab Select model YAML file from a :class:`Model` instance. Parameters: petab_yaml: The location where the PEtab Select model YAML file will be saved. args, kwargs: Additional arguments are passed to ``self.to_dict``. """ # FIXME change `getattr(self, PETAB_YAML)` to be relative to # destination? # kind of fixed, as the path will be resolved in `to_dict`. with open(petab_yaml, 'w') as f: yaml.dump(self.to_dict(*args, **kwargs), f)
# yaml.dump(self.to_dict(), str(petab_yaml))
[docs] def to_petab( self, output_path: TYPE_PATH = None, set_estimated_parameters: Optional[bool] = None, ) -> Dict[str, Union[petab.Problem, TYPE_PATH]]: """Generate a PEtab problem. Args: output_path: The directory where PEtab files will be written to disk. If not specified, the PEtab files will not be written to disk. set_estimated_parameters: Whether to set the nominal value of estimated parameters to their estimates. If parameter estimates are available, this will default to `True`. Returns: A 2-tuple. The first value is a PEtab problem that can be used with a PEtab-compatible tool for calibration of this model. If ``output_path`` is not ``None``, the second value is the path to a PEtab YAML file that can be used to load the PEtab problem (the first value) into any PEtab-compatible tool. """ # TODO could use `copy.deepcopy(self.petab_problem)` from PetabMixin? petab_problem = petab.Problem.from_yaml(str(self.petab_yaml)) if set_estimated_parameters is None and self.estimated_parameters: set_estimated_parameters = True for parameter_id, parameter_value in self.parameters.items(): # If the parameter is to be estimated. if parameter_value == ESTIMATE: petab_problem.parameter_df.loc[parameter_id, ESTIMATE] = 1 if set_estimated_parameters: if parameter_id not in self.estimated_parameters: raise ValueError( "Not all estimated parameters are available " "in `model.estimated_parameters`. Hence, the " "estimated parameter vector cannot be set as " "the nominal value in the PEtab problem. " "Try calling this method with " "`set_estimated_parameters=False`." ) petab_problem.parameter_df.loc[ parameter_id, NOMINAL_VALUE ] = self.estimated_parameters[parameter_id] # Else the parameter is to be fixed. else: petab_problem.parameter_df.loc[parameter_id, ESTIMATE] = 0 petab_problem.parameter_df.loc[ parameter_id, NOMINAL_VALUE ] = parameter_string_to_value(parameter_value) # parameter_value petab_yaml = None if output_path is not None: output_path = Path(output_path) output_path.mkdir(exist_ok=True, parents=True) petab_yaml = petab_problem.to_files_generic( prefix_path=output_path ) return { PETAB_PROBLEM: petab_problem, PETAB_YAML: petab_yaml, }
[docs] def get_hash(self) -> int: """Get the model hash. Currently designed to only use pre-calibration information, such that if a model is calibrated twice and the two calibrated models differ in their parameter estimates, then they will still have the same hash. This is not implemented as ``__hash__`` because Python automatically truncates values in a system-dependent manner, which reduces interoperability ( https://docs.python.org/3/reference/datamodel.html#object.__hash__ ). Returns: The hash. """ if self.model_hash is None: self.model_hash = hash_list( [ method(getattr(self, attribute)) for attribute, method in Model.hash_attributes.items() ] ) return self.model_hash
def __hash__(self) -> None: """Use `Model.get_hash` instead.""" raise NotImplementedError('Use `Model.get_hash() instead.`') def __str__(self): """Get a print-ready string representation of the model. Returns: The print-ready string representation, in TSV format. """ parameter_ids = '\t'.join(self.parameters.keys()) parameter_values = '\t'.join(str(v) for v in self.parameters.values()) header = '\t'.join([MODEL_ID, PETAB_YAML, parameter_ids]) data = '\t'.join( [self.model_id, str(self.petab_yaml), parameter_values] ) # header = f'{MODEL_ID}\t{PETAB_YAML}\t{parameter_ids}' # data = f'{self.model_id}\t{self.petab_yaml}\t{parameter_values}' return f'{header}\n{data}'
[docs] def get_mle(self) -> Dict[str, float]: """Get the maximum likelihood estimate of the model.""" """ FIXME(dilpath) # Check if original PEtab problem or PEtab Select model has estimated # parameters. e.g. can use some of `self.to_petab` to get the parameter # df and see if any are estimated. if not self.has_estimated_parameters: warn('The MLE for this model contains no estimated parameters.') if not all([ parameter_id in getattr(self, ESTIMATED_PARAMETERS) for parameter_id in self.get_estimated_parameter_ids_all() ]): warn('Not all estimated parameters have estimates stored.') petab_problem = petab.Problem.from_yaml(str(self.petab_yaml)) return { parameter_id: ( getattr(self, ESTIMATED_PARAMETERS).get( # Return estimated parameter from `petab_select.Model` # if possible. parameter_id, # Else return nominal value from PEtab parameter table. petab_problem.parameter_df.loc[ parameter_id, NOMINAL_VALUE ], ) ) for parameter_id in petab_problem.parameter_df.index } # TODO rewrite to construct return dict in a for loop, for more # informative error message as soon as a "should-be-estimated" # parameter has not estimate available in `self.estimated_parameters`. """ # TODO pass
def get_estimated_parameter_ids_all(self) -> List[str]: estimated_parameter_ids = [] # Add all estimated parameters in the PEtab problem. petab_problem = petab.Problem.from_yaml(str(self.petab_yaml)) for parameter_id in petab_problem.parameter_df.index: if ( petab_problem.parameter_df.loc[parameter_id, ESTIMATE] == PETAB_ESTIMATE_TRUE ): estimated_parameter_ids.append(parameter_id) # Add additional estimated parameters, and collect fixed parameters, # in this model's parameterization. fixed_parameter_ids = [] for parameter_id, value in self.parameters.items(): if ( value == ESTIMATE and parameter_id not in estimated_parameter_ids ): estimated_parameter_ids.append(parameter_id) elif value != ESTIMATE: fixed_parameter_ids.append(parameter_id) # Remove fixed parameters. estimated_parameter_ids = [ parameter_id for parameter_id in estimated_parameter_ids if parameter_id not in fixed_parameter_ids ] return estimated_parameter_ids
[docs] def get_parameter_values( self, parameter_ids: Optional[List[str]] = None, ) -> List[TYPE_PARAMETER]: """Get parameter values. Includes ``ESTIMATE`` for parameters that should be estimated. The ordering is by ``parameter_ids`` if supplied, else ``self.petab_parameters``. Args: parameter_ids: The IDs of parameters that values will be returned for. Order is maintained. Returns: The values of parameters. """ if parameter_ids is None: parameter_ids = list(self.petab_parameters) return [ self.parameters.get( parameter_id, # Default to PEtab problem. self.petab_parameters[parameter_id], ) for parameter_id in parameter_ids ]
[docs]def default_compare( model0: Model, model1: Model, criterion: Criterion, criterion_threshold: float = 0, ) -> bool: """Compare two calibrated models by their criterion values. It is assumed that the model ``model0`` provides a value for the criterion ``criterion``, or is the ``VIRTUAL_INITIAL_MODEL``. Args: model0: The original model. model1: The new model. criterion: The criterion by which models will be compared. criterion_threshold: The value by which the new model must improve on the original model. Should be non-negative. Returns: ``True` if ``model1`` has a better criterion value than ``model0``, else ``False``. """ if not model1.has_criterion(criterion): warnings.warn( f'Model "{model1.model_id}" does not provide a value for the criterion "{criterion}".' ) return False if model0 == VIRTUAL_INITIAL_MODEL or model0 is None: return True if criterion_threshold < 0: warnings.warn( 'The provided criterion threshold is negative. The absolute value will be used instead.' ) criterion_threshold = abs(criterion_threshold) if criterion in [ Criterion.AIC, Criterion.AICC, Criterion.BIC, Criterion.NLLH, ]: return ( model1.get_criterion(criterion) < model0.get_criterion(criterion) - criterion_threshold ) elif criterion in [ Criterion.LH, Criterion.LLH, ]: return ( model1.get_criterion(criterion) > model0.get_criterion(criterion) + criterion_threshold ) else: raise NotImplementedError(f'Unknown criterion: {criterion}.')
[docs]def models_from_yaml_list( model_list_yaml: TYPE_PATH, petab_problem: petab.Problem = None, allow_single_model: bool = True, ) -> List[Model]: """Generate a model from a PEtab Select list of model YAML file. Args: model_list_yaml: The path to the PEtab Select list of model YAML file. petab_problem: See :meth:`Model.from_dict`. allow_single_model: Given a YAML file that contains a single model directly (not in a 1-element list), if ``True`` then the single model will be read in, else a ``ValueError`` will be raised. Returns: A list of model instances, initialized with the provided attributes. """ with open(str(model_list_yaml)) as f: model_dict_list = yaml.safe_load(f) if model_dict_list is None: return [] if not isinstance(model_dict_list, list): if allow_single_model: return [ Model.from_dict( model_dict_list, base_path=Path(model_list_yaml).parent, petab_problem=petab_problem, ) ] raise ValueError('The YAML file does not contain a list of models.') return [ Model.from_dict( model_dict, base_path=Path(model_list_yaml).parent, petab_problem=petab_problem, ) for model_dict in model_dict_list ]
def models_to_yaml_list( models: List[Model], output_yaml: TYPE_PATH, relative_paths: bool = True, ): paths_relative_to = None if relative_paths: paths_relative_to = Path(output_yaml).parent model_dicts = [ model.to_dict(paths_relative_to=paths_relative_to) for model in models ] model_dicts = None if not model_dicts else model_dicts with open(output_yaml, 'w') as f: yaml.dump(model_dicts, f)