Source code for petab_select.model_space

"""The `ModelSpace` class and related methods."""

from __future__ import annotations

import logging
import warnings
from collections.abc import Iterable
from pathlib import Path
from typing import Any

import numpy as np
import pandas as pd

from .candidate_space import CandidateSpace
from .constants import (
    MODEL_SUBSPACE_ID,
    TYPE_PATH,
)
from .model import Model
from .model_subspace import ModelSubspace

__all__ = [
    "ModelSpace",
]


[docs] class ModelSpace: """A model space, as a collection of model subspaces. Attributes: model_subspaces: List of model subspaces. exclusions: Hashes of models that are excluded from the model space. """
[docs] def __init__( self, model_subspaces: list[ModelSubspace], ): self.model_subspaces = { model_subspace.model_subspace_id: model_subspace for model_subspace in model_subspaces }
[docs] @staticmethod def load( data: TYPE_PATH | pd.DataFrame | list[TYPE_PATH | pd.DataFrame], root_path: TYPE_PATH = None, ) -> ModelSpace: """Load a model space from dataframe(s) or file(s). Args: data: The data. TSV file(s) or pandas dataframe(s). root_path: Any paths in dataframe will be resolved relative to this path. Paths in TSV files will be resolved relative to the directory of the TSV file. Returns: The model space. """ if not isinstance(data, list): data = [data] dfs = [ ( root_path, df.reset_index() if df.index.name == MODEL_SUBSPACE_ID else df, ) if isinstance(df, pd.DataFrame) else (Path(df).parent, pd.read_csv(df, sep="\t")) for df in data ] model_subspaces = [] for root_path, df in dfs: for _, definition in df.iterrows(): model_subspaces.append( ModelSubspace.from_definition( definition=definition, root_path=root_path, ) ) model_space = ModelSpace(model_subspaces=model_subspaces) return model_space
[docs] def save(self, filename: TYPE_PATH | None = None) -> pd.DataFrame: """Export the model space to a dataframe (and TSV). Args: filename: If provided, the dataframe will be saved here as a TSV. Paths will be made relative to the parent directory of this filename. Returns: The dataframe. """ root_path = Path(filename).parent if filename else None data = [] for model_subspace in self.model_subspaces.values(): data.append(model_subspace.to_definition(root_path=root_path)) df = pd.DataFrame(data) df = df.set_index(MODEL_SUBSPACE_ID) if filename: df.to_csv(filename, sep="\t") return df
[docs] def search( self, candidate_space: CandidateSpace, limit: int = np.inf, exclude: bool = True, ): """Search all model subspaces according to a candidate space method. Args: candidate_space: The candidate space. limit: The maximum number of models to send to the candidate space (i.e. this limit is on the number of models considered, not necessarily approved as candidates). Note that using a limit may produce unexpected results. For example, it may bias candidate models to be chosen only from a subset of model subspaces. exclude: Whether to exclude the new candidates from the model subspaces. """ if candidate_space.limit.reached(): warnings.warn( "The candidate space has already reached its limit of accepted models.", RuntimeWarning, stacklevel=2, ) return candidate_space.models @candidate_space.wrap_search_subspaces def search_subspaces(only_one_subspace: bool = False): # TODO change dict to list of subspaces. Each subspace should manage its own # ID if only_one_subspace and len(self.model_subspaces) > 1: logging.warning( f"There is more than one model subspace. This can lead to problems for candidate space {candidate_space}, especially if they have different PEtab YAML files." ) for model_subspace in self.model_subspaces.values(): model_subspace.search( candidate_space=candidate_space, limit=limit ) if len(candidate_space.models) == limit: break elif len(candidate_space.models) > limit: raise ValueError( "An unknown error has occurred. Too many models were " f"generated. Requested limit: {limit}. Number of " f"generated models: {len(candidate_space.models)}." ) search_subspaces() if exclude: self.exclude_models(candidate_space.models) return candidate_space.models
def __len__(self): """Get the number of models in this space.""" subspace_counts = [len(s) for s in self.model_subspaces] total_count = sum(subspace_counts) return total_count def exclude_model(self, model: Model): # FIXME add Exclusions Mixin (or object) to handle exclusions on the subspace # and space level. for model_subspace in self.model_subspaces.values(): model_subspace.exclude_model(model) def exclude_models(self, models: Iterable[Model]): # FIXME add Exclusions Mixin (or object) to handle exclusions on the subspace # and space level. for model_subspace in self.model_subspaces.values(): model_subspace.exclude_models(models) # model_subspace.reset_exclusions() def exclude_model_hashes(self, model_hashes: Iterable[str]): # FIXME add Exclusions Mixin (or object) to handle exclusions on the subspace # and space level. for model_subspace in self.model_subspaces.values(): model_subspace.exclude_model_hashes(model_hashes=model_hashes)
[docs] def reset_exclusions( self, exclusions: list[Any] | None | None = None, ) -> None: """Reset the exclusions in the model subspaces.""" for model_subspace in self.model_subspaces.values(): model_subspace.reset_exclusions(exclusions)