Source code for petab_select.ui

import copy
import warnings
from pathlib import Path
from typing import Any

import numpy as np
import petab.v1 as petab

from . import analyze
from .candidate_space import CandidateSpace, FamosCandidateSpace
from .constants import (
    CANDIDATE_SPACE,
    MODELS,
    PREDECESSOR_MODEL,
    TERMINATE,
    TYPE_PATH,
    UNCALIBRATED_MODELS,
    Criterion,
    Method,
)
from .model import VIRTUAL_INITIAL_MODEL, Model, ModelHash, default_compare
from .models import Models
from .problem import Problem

__all__ = [
    "start_iteration",
    "end_iteration",
    "model_to_petab",
    "models_to_petab",
    "get_best",
    "write_summary_tsv",
]


def start_iteration_result(candidate_space: CandidateSpace) -> dict[str, Any]:
    """Get the state after starting the iteration.

    Args:
        candidate_space:
            The candidate space.

    Returns:
        The candidate space, the uncalibrated models, and the predecessor
        model.
    """
    # Set model iteration for the models that the calibration tool
    # will see. All models (user-supplied and newly-calibrated) will
    # have their iteration set (again) in `end_iteration`, via
    # `CandidateSpace.get_iteration_calibrated_models`
    # TODO use problem.state.iteration instead
    for model in candidate_space.models:
        model.iteration = candidate_space.iteration
    return {
        CANDIDATE_SPACE: candidate_space,
        UNCALIBRATED_MODELS: candidate_space.models,
        PREDECESSOR_MODEL: candidate_space.get_predecessor_model(),
    }



[docs]
def start_iteration(
    problem: Problem,
    candidate_space: CandidateSpace | None = None,
    limit: float | int = np.inf,
    limit_sent: float | int = np.inf,
    excluded_hashes: list[ModelHash] | None = None,
    criterion: Criterion | None = None,
    user_calibrated_models: Models | None = None,
) -> CandidateSpace:
    """Search the model space for candidate models.

    The predecessor model can be specified in the `candidate_space`
    (:func:`CandidateSpace.set_predecessor_model). If `candidate_space` is not
    provided, then the predecessor model can be specified in `problem`
    (:attr:`Problem.candidate_space_arguments`).

    Args:
        problem:
            A PEtab Select problem.
        candidate_space:
            The candidate space. Defaults to a new candidate space based on the method
            defined in the problem.
        limit:
            The maximum number of models to add to the candidate space.
        limit_sent:
            The maximum number of models sent to the candidate space (which are possibly
            rejected and excluded).
        excluded_hashes:
            Hashes of models that will be excluded from the candidate space.
        criterion:
            The criterion by which models will be compared. Defaults to the criterion
            defined in the PEtab Select problem.
        user_calibrated_models:
            Models that were already calibrated by the user. If a model in the
            candidates has the same hash as a model in
            `user_calibrated_models`, then the candidate will be replaced with
            the calibrated version. Calibration tools will only receive uncalibrated
            models from this method.

    Returns:
        A dictionary, with the following items:
            :const:`petab_select.constants.CANDIDATE_SPACE`:
                The candidate space.
            :const:`petab_select.constants.MODELS`:
                The uncalibrated models of the current iteration.
    """
    """
    FIXME(dilpath)
    - currently takes predecessor model from
      candidate_space.previous_predecessor_model
    - deprecate limit_sent? possibly unused by anyone
    - add `Iteration` class to manage an iteration, append to
      `CandidateSpace.iterations`?
    """
    if isinstance(user_calibrated_models, dict):
        warnings.warn(
            (
                "`calibrated_models` should be a `petab_select.Models` object. "
                "e.g. `calibrated_models = "
                "petab_select.Models(old_calibrated_models.values())`."
            ),
            DeprecationWarning,
            stacklevel=2,
        )
        user_calibrated_models = Models(user_calibrated_models.values())
    do_search = True
    # FIXME might be difficult for a CLI tool to specify a specific predecessor
    #       model if their candidate space has models. Need a way to empty
    #       the candidate space of models... might be difficult with pickled
    #       candidate space objects/arguments?
    if excluded_hashes is None:
        excluded_hashes = []
    if candidate_space is None:
        candidate_space = problem.new_candidate_space(limit=limit)

    if criterion is None:
        criterion = problem.criterion
    if criterion is None:
        raise ValueError("Please provide a criterion.")
    candidate_space.criterion = criterion

    # Start a new iteration
    problem.state.increment_iteration()
    candidate_space.iteration = problem.state.iteration

    # Set the predecessor model to the previous predecessor model.
    predecessor_model = candidate_space.previous_predecessor_model

    # If the predecessor model has not yet been calibrated, then calibrate it.
    if predecessor_model.hash != VIRTUAL_INITIAL_MODEL.hash:
        if (
            predecessor_model.get_criterion(
                criterion,
                raise_on_failure=False,
            )
            is None
        ):
            candidate_space.models = Models([copy.deepcopy(predecessor_model)])
            # Dummy zero likelihood, which the predecessor model will
            # improve on after it's actually calibrated.
            predecessor_model.set_criterion(Criterion.LH, 0.0)
            candidate_space.set_iteration_user_calibrated_models(
                user_calibrated_models=user_calibrated_models
            )
            return start_iteration_result(candidate_space=candidate_space)

        # Exclude the calibrated predecessor model.
        if not candidate_space.excluded(predecessor_model):
            candidate_space.set_excluded_hashes(
                predecessor_model,
                extend=True,
            )

    # Set the new predecessor_model from the initial model or
    # by calling ui.best to find the best model to jump to if
    # this is not the first step of the search.
    if candidate_space.latest_iteration_calibrated_models:
        predecessor_model = analyze.get_best(
            models=candidate_space.latest_iteration_calibrated_models,
            criterion=criterion,
            compare=problem.compare,
        )
        # If the new predecessor model isn't better than the previous one,
        # keep the previous one.
        # If FAMoS jumped this will not be useful, since the jumped-to model
        # can be expected to be worse than the jumped-from model, in general.
        if not default_compare(
            model0=candidate_space.previous_predecessor_model,
            model1=predecessor_model,
            criterion=criterion,
        ):
            predecessor_model = candidate_space.previous_predecessor_model

        # If candidate space not Famos then ignored.
        # Else, in case we jumped to most distant in this iteration, go into
        # calibration with only the model we've jumped to.
        # TODO handle as proper `MostDistantCandidateSpace`
        if (
            isinstance(candidate_space, FamosCandidateSpace)
            and candidate_space.jumped_to_most_distant
        ):
            return start_iteration_result(candidate_space=candidate_space)

    candidate_space.reset(predecessor_model)

    # FIXME store exclusions in candidate space only
    problem.model_space.exclude_model_hashes(model_hashes=excluded_hashes)
    while do_search:
        problem.model_space.search(candidate_space, limit=limit_sent)

        write_summary_tsv(
            problem=problem,
            candidate_space=candidate_space,
            previous_predecessor_model=candidate_space.previous_predecessor_model,
            predecessor_model=predecessor_model,
        )

        if candidate_space.models:
            break

        # No models were found. Repeat the search with the same candidate space,
        # if the candidate space is able to switch methods.
        # N.B.: candidate spaces that switch methods must raise `StopIteration`
        # when they stop switching.
        if isinstance(candidate_space, FamosCandidateSpace):
            try:
                candidate_space.update_after_calibration(
                    iteration_calibrated_models=Models(),
                )
                continue
            except StopIteration:
                break

        # No models were found, and the method doesn't switch, so no further
        # models can be found.
        break

    candidate_space.previous_predecessor_model = predecessor_model

    candidate_space.set_iteration_user_calibrated_models(
        user_calibrated_models=user_calibrated_models
    )
    return start_iteration_result(candidate_space=candidate_space)




[docs]
def end_iteration(
    problem: Problem,
    candidate_space: CandidateSpace,
    calibrated_models: Models,
) -> dict[str, Models | bool | CandidateSpace]:
    """Finalize model selection iteration.

    All models from the current iteration are provided to the calibration tool.
    This includes user-calibrated models that the tool did not see until now.

    A termination signal is also provided, if the model selection algorithm
    ends.

    Args:
        problem:
            The PEtab Select problem.
        candidate_space:
            The candidate space.
        calibrated_models:
            The calibration results for the uncalibrated models of this
            iteration.

    Returns:
        A dictionary, with the following items:
            :const:`petab_select.constants.MODELS`:
                All calibrated models for the current iteration.
            :const:`petab_select.constants.TERMINATE`:
                Whether PEtab Select has decided to end the model selection,
                as a boolean.
    """
    if isinstance(calibrated_models, dict):
        warnings.warn(
            (
                "`calibrated_models` should be a `petab_select.Models` object. "
                "e.g. `calibrated_models = "
                "petab_select.Models(old_calibrated_models.values())`."
            ),
            DeprecationWarning,
            stacklevel=2,
        )
        calibrated_models = Models(calibrated_models.values())
    iteration_results = {
        MODELS: candidate_space.get_iteration_calibrated_models(
            calibrated_models=calibrated_models,
            reset=True,
        )
    }

    terminate = not iteration_results[MODELS]
    try:
        candidate_space.update_after_calibration(
            iteration_calibrated_models=iteration_results[MODELS],
        )
    except StopIteration:
        # e.g. FAMoS switch_method encountered "None", indicating end of model
        # selection
        terminate = True
    iteration_results[TERMINATE] = terminate

    iteration_results[CANDIDATE_SPACE] = candidate_space

    problem.state.models.extend(iteration_results[MODELS])

    return iteration_results




[docs]
def model_to_petab(
    model: Model,
    output_path: TYPE_PATH | None = None,
) -> dict[str, petab.Problem | TYPE_PATH]:
    """Generate the PEtab problem for a model.

    Args:
        model:
            The model.
        output_path:
            If specified, the PEtab problem will be output to files in this directory.

    Returns:
        The PEtab problem, and the path to the PEtab problem YAML file, if an output
        path is provided.
    """
    return model.to_petab(output_path=output_path)




[docs]
def models_to_petab(
    models: Models,
    output_path_prefix: list[TYPE_PATH] | None = None,
) -> list[dict[str, petab.Problem | TYPE_PATH]]:
    """Generate the PEtab problems for a list of models.

    Args:
        models:
            The list of model.
        output_path_prefix:
            If specified, the PEtab problem will be output to files in subdirectories
            of this path, where each subdirectory corresponds to a model.

    Returns:
        The PEtab problems, and the paths to the PEtab problem YAML files, if an output
        path prefix is provided.
    """
    output_path_prefix = Path(output_path_prefix)
    result = []
    for model in models:
        output_path = output_path_prefix / model.model_id
        result.append(model_to_petab(model=model, output_path=output_path))
    return result




[docs]
def get_best(
    problem: Problem,
    models: list[Model],
    criterion: str | Criterion | None = None,
) -> Model:
    """Get the best model from a list of models.

    Args:
        problem:
            The PEtab Select problem.
        models:
            The list of models.
        criterion:
            The criterion by which models will be compared. Defaults to
            `problem.criterion`.

    Returns:
        The best model.
    """
    # TODO return list, when multiple models are equally "best"
    criterion = criterion or problem.criterion
    return analyze.get_best(
        models=models, criterion=criterion, compare=problem.compare
    )



def write_summary_tsv(
    problem: Problem,
    candidate_space: CandidateSpace | None = None,
    previous_predecessor_model: str | Model | None = None,
    predecessor_model: Model | None = None,
) -> None:
    if candidate_space.summary_tsv is None:
        return

    previous_predecessor_parameter_ids = set()
    if isinstance(previous_predecessor_model, Model):
        previous_predecessor_parameter_ids = set(
            previous_predecessor_model.get_estimated_parameter_ids()
        )

    if predecessor_model is None:
        predecessor_model = candidate_space.predecessor_model
    predecessor_parameter_ids = set()
    predecessor_criterion = None
    if isinstance(predecessor_model, Model):
        predecessor_parameter_ids = set(
            predecessor_model.get_estimated_parameter_ids()
        )
        predecessor_criterion = predecessor_model.get_criterion(
            problem.criterion
        )

    diff_parameter_ids = (
        previous_predecessor_parameter_ids.symmetric_difference(
            predecessor_parameter_ids
        )
    )

    diff_candidates_parameter_ids = []
    for candidate_model in candidate_space.models:
        candidate_parameter_ids = set(
            candidate_model.get_estimated_parameter_ids()
        )
        diff_candidates_parameter_ids.append(
            list(
                candidate_parameter_ids.symmetric_difference(
                    predecessor_parameter_ids
                )
            )
        )

    # FIXME remove once MostDistantCandidateSpace exists...
    #       which might be difficult to implement because the most
    #       distant is a hypothetical model, which is then used to find a
    #       real model in its neighborhood of the model space
    method = candidate_space.method
    if isinstance(candidate_space, FamosCandidateSpace):
        with open(candidate_space.summary_tsv) as f:
            if f.readlines()[-1].startswith("Jumped"):
                method = Method.MOST_DISTANT

    candidate_space.write_summary_tsv(
        [
            method,
            len(candidate_space.models),
            sorted(diff_parameter_ids),
            predecessor_criterion,
            sorted(predecessor_parameter_ids),
            sorted(
                diff_candidates_parameter_ids,
                key=lambda x: [x[i] for i in range(len(x))],
            ),
        ]
    )