Source code for petab_select.ui

import copy
from pathlib import Path
from typing import Dict, List, Optional, Union

import numpy as np
import petab

from .candidate_space import CandidateSpace, FamosCandidateSpace
from .constants import (
    INITIAL_MODEL_METHODS,
    TYPE_PATH,
    VIRTUAL_INITIAL_MODEL,
    Criterion,
    Method,
)
from .model import Model, default_compare
from .problem import Problem

__all__ = [
    'candidates',
    'model_to_petab',
    'models_to_petab',
    'best',
    'write_summary_tsv',
]


[docs]def candidates( problem: Problem, candidate_space: Optional[CandidateSpace] = None, limit: Union[float, int] = np.inf, limit_sent: Union[float, int] = np.inf, calibrated_models: Optional[Dict[str, Model]] = None, newly_calibrated_models: Optional[Dict[str, Model]] = None, excluded_models: Optional[List[Model]] = None, excluded_model_hashes: Optional[List[str]] = None, criterion: Optional[Criterion] = None, ) -> CandidateSpace: """Search the model space for candidate models. A predecessor model is chosen from ``newly_calibrated_models`` if available, otherwise from ``calibrated_models``, and is used for applicable methods. Args: problem: A PEtab Select problem. candidate_space: The candidate space. Defaults to a new candidate space based on the method defined in the problem. limit: The maximum number of models to add to the candidate space. limit_sent: The maximum number of models sent to the candidate space (which are possibly rejected and excluded). calibrated_models: All calibrated models in the model selection. newly_calibrated_models: All calibrated models in the most recent iteration of model selection. excluded_models: Models that will be excluded from model subspaces during the search for candidates. excluded_model_hashes: Hashes of models that will be excluded from model subspaces during the search for candidates. criterion: The criterion by which models will be compared. Defaults to the criterion defined in the PEtab Select problem. Returns: The candidate space, which contains the candidate models. """ do_search = True # FIXME might be difficult for a CLI tool to specify a specific predecessor # model if their candidate space has models. Need a way to empty # the candidate space of models... might be difficult with pickled # candidate space objects/arguments? if excluded_models is None: excluded_models = [] if excluded_model_hashes is None: excluded_model_hashes = [] if calibrated_models is None: calibrated_models = {} if newly_calibrated_models is None: newly_calibrated_models = {} calibrated_models.update(newly_calibrated_models) if criterion is None: criterion = problem.criterion if candidate_space is None: candidate_space = problem.new_candidate_space(limit=limit) candidate_space.exclude_hashes(calibrated_models) # Set the predecessor model to the previous predecessor model. predecessor_model = candidate_space.previous_predecessor_model # If the predecessor model has not yet been calibrated, then calibrate it. if ( predecessor_model is not None and predecessor_model != VIRTUAL_INITIAL_MODEL ): if ( predecessor_model.get_criterion( criterion, raise_on_failure=False, ) is None ): candidate_space.models = [copy.deepcopy(predecessor_model)] # Dummy zero likelihood, which the predecessor model will # improve on after it's actually calibrated. predecessor_model.set_criterion(Criterion.LH, 0.0) return candidate_space # Exclude the calibrated predecessor model. if not candidate_space.excluded(predecessor_model): candidate_space.exclude(predecessor_model) # Set the new predecessor_model from the initial model or # by calling ui.best to find the best model to jump to if # this is not the first step of the search. if newly_calibrated_models: predecessor_model = problem.get_best( newly_calibrated_models.values(), criterion=criterion, ) # If the new predecessor model isn't better than the previous one, # keep the previous one. # If FAMoS jumped this will not be useful, since the jumped-to model # can be expected to be worse than the jumped-from model, in general. if not default_compare( model0=candidate_space.previous_predecessor_model, model1=predecessor_model, criterion=criterion, ): predecessor_model = candidate_space.previous_predecessor_model try: candidate_space.update_after_calibration( calibrated_models=calibrated_models, newly_calibrated_models=newly_calibrated_models, criterion=criterion, ) except StopIteration: do_search = False # If candidate space not Famos then ignored. # Else, in case we jumped to most distant in this iteration, go into # calibration with only the model we've jumped to. if ( isinstance(candidate_space, FamosCandidateSpace) and candidate_space.jumped_to_most_distant ): return candidate_space if ( predecessor_model is None and candidate_space.method in INITIAL_MODEL_METHODS and calibrated_models ): predecessor_model = problem.get_best( models=calibrated_models.values(), criterion=criterion, ) if predecessor_model is not None: candidate_space.reset(predecessor_model) # TODO support excluding model IDs? should be faster but may have issues, e.g.: # - duplicate model IDs among multiple model subspaces # - perhaps less portable if model IDs are generated differently on different # computers problem.model_space.exclude_models(models=excluded_models) problem.model_space.exclude_model_hashes( model_hashes=excluded_model_hashes ) while do_search: problem.model_space.search(candidate_space, limit=limit_sent) write_summary_tsv( problem=problem, candidate_space=candidate_space, previous_predecessor_model=candidate_space.previous_predecessor_model, predecessor_model=predecessor_model, ) if candidate_space.models: break # No models were found. Repeat the search with the same candidate space, # if the candidate space is able to switch methods. # N.B.: candidate spaces that switch methods must raise `StopIteration` # when they stop switching. if isinstance(candidate_space, FamosCandidateSpace): try: candidate_space.update_after_calibration( calibrated_models=calibrated_models, newly_calibrated_models={}, criterion=criterion, ) continue except StopIteration: break # No models were found, and the method doesn't switch, so no further # models can be found. break candidate_space.previous_predecessor_model = predecessor_model return candidate_space
[docs]def model_to_petab( model: Model, output_path: Optional[TYPE_PATH] = None, ) -> Dict[str, Union[petab.Problem, TYPE_PATH]]: """Generate the PEtab problem for a model. Args: model: The model. output_path: If specified, the PEtab problem will be output to files in this directory. Returns: The PEtab problem, and the path to the PEtab problem YAML file, if an output path is provided. """ return model.to_petab(output_path=output_path)
[docs]def models_to_petab( models: List[Model], output_path_prefix: Optional[List[TYPE_PATH]] = None, ) -> List[Dict[str, Union[petab.Problem, TYPE_PATH]]]: """Generate the PEtab problems for a list of models. Args: models: The list of model. output_path_prefix: If specified, the PEtab problem will be output to files in subdirectories of this path, where each subdirectory corresponds to a model. Returns: The PEtab problems, and the paths to the PEtab problem YAML files, if an output path prefix is provided. """ output_path_prefix = Path(output_path_prefix) result = [] for model in models: output_path = output_path_prefix / model.model_id result.append(model_to_petab(model=model, output_path=output_path)) return result
[docs]def best( problem: Problem, models: List[Model], criterion: Optional[Union[str, None]] = None, ) -> Model: """Get the best model from a list of models. Args: problem: The PEtab Select problem. models: The list of models. criterion: The criterion by which models will be compared. Defaults to `problem.criterion`. Returns: The best model. """ # TODO return list, when multiple models are equally "best" return problem.get_best(models=models, criterion=criterion)
def write_summary_tsv( problem: Problem, candidate_space: Optional[CandidateSpace] = None, previous_predecessor_model: Optional[Union[str, Model]] = None, predecessor_model: Optional[Model] = None, ) -> None: if candidate_space.summary_tsv is None: return previous_predecessor_parameter_ids = set() if isinstance(previous_predecessor_model, Model): previous_predecessor_parameter_ids = set( previous_predecessor_model.get_estimated_parameter_ids_all() ) if predecessor_model is None: predecessor_model = candidate_space.predecessor_model predecessor_parameter_ids = set() predecessor_criterion = None if isinstance(predecessor_model, Model): predecessor_parameter_ids = set( predecessor_model.get_estimated_parameter_ids_all() ) predecessor_criterion = predecessor_model.get_criterion( problem.criterion ) diff_parameter_ids = ( previous_predecessor_parameter_ids.symmetric_difference( predecessor_parameter_ids ) ) diff_candidates_parameter_ids = [] for candidate_model in candidate_space.models: candidate_parameter_ids = set( candidate_model.get_estimated_parameter_ids_all() ) diff_candidates_parameter_ids.append( list( candidate_parameter_ids.symmetric_difference( predecessor_parameter_ids ) ) ) # FIXME remove once MostDistantCandidateSpace exists... method = candidate_space.method if ( isinstance(candidate_space, FamosCandidateSpace) and isinstance(candidate_space.predecessor_model, Model) and candidate_space.predecessor_model.predecessor_model_hash is None ): with open(candidate_space.summary_tsv, 'r') as f: if sum(1 for _ in f) > 1: method = Method.MOST_DISTANT candidate_space.write_summary_tsv( [ method, len(candidate_space.models), sorted(diff_parameter_ids), predecessor_criterion, sorted(predecessor_parameter_ids), sorted( diff_candidates_parameter_ids, key=lambda x: [x[i] for i in range(len(x))], ), ] )