Source code for petab_select.model_space

"""The `ModelSpace` class and related methods."""
import itertools
import logging
import warnings
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any, Iterable, List, Optional, TextIO, Union, get_args

import numpy as np
import pandas as pd

from .candidate_space import CandidateSpace
from .constants import (
    HEADER_ROW,
    MODEL_ID_COLUMN,
    MODEL_SUBSPACE_ID,
    PARAMETER_DEFINITIONS_START,
    PARAMETER_VALUE_DELIMITER,
    PETAB_YAML_COLUMN,
    TYPE_PATH,
)
from .model import Model
from .model_subspace import ModelSubspace

__all__ = [
    "ModelSpace",
    "get_model_space_df",
    "read_model_space_file",
    "write_model_space_df",
]


[docs]def read_model_space_file(filename: str) -> TextIO:
    """Read a model space file.

    The model space specification is currently expanded and written to a
    temporary file.

    Args:
        filename:
            The name of the file to be unpacked.

    Returns:
        A temporary file object, which is the unpacked file.
    """
    """
    FIXME(dilpath)
    Todo:
        * Consider alternatives to `_{n}` suffix for model `modelId`
        * How should the selected model be reported to the user? Remove the
          `_{n}` suffix and report the original `modelId` alongside the
          selected parameters? Generate a set of PEtab files with the
          chosen SBML file and the parameters specified in a parameter or
          condition file?
        * Don't "unpack" file if it is already in the unpacked format
        * Sort file after unpacking
        * Remove duplicates?
    """
    # FIXME rewrite to just generate models from the original file, instead of
    #       expanding all and writing to a file.
    expanded_models_file = NamedTemporaryFile(mode="r+", delete=False)
    with open(filename) as fh:
        with open(expanded_models_file.name, "w") as ms_f:
            # could replace `else` condition with ms_f.readline() here, and
            # remove `if` statement completely
            for line_index, line in enumerate(fh):
                # Skip empty/whitespace-only lines
                if not line.strip():
                    continue
                if line_index != HEADER_ROW:
                    columns = line2row(line, unpacked=False)
                    parameter_definitions = [
                        definition.split(PARAMETER_VALUE_DELIMITER)
                        for definition in columns[PARAMETER_DEFINITIONS_START:]
                    ]
                    for index, selection in enumerate(
                        itertools.product(*parameter_definitions)
                    ):
                        # TODO change MODEL_ID_COLUMN and YAML_ID_COLUMN
                        # to just MODEL_ID and YAML_FILENAME?
                        ms_f.write(
                            "\t".join(
                                [
                                    columns[MODEL_ID_COLUMN] + f"_{index}",
                                    columns[PETAB_YAML_COLUMN],
                                    *selection,
                                ]
                            )
                            + "\n"
                        )
                else:
                    ms_f.write(line)
    # FIXME replace with some 'ModelSpaceManager' object
    return expanded_models_file


def line2row(
    line: str,
    delimiter: str = "\t",
    unpacked: bool = True,
    convert_parameters_to_float: bool = True,
) -> List:
    """Parse a line from a model space file.

    Args:
        line:
            A line from a file with delimiter-separated columns.
        delimiter:
            The string that separates columns in the file.
        unpacked:
            Whether the line format is in the unpacked format. If ``False``,
            parameter values are not converted to ``float``.
        convert_parameters_to_float:
            Whether parameters should be converted to ``float``.

    Returns:
        A list of column values. Parameter values are converted to ``float``.
    """
    columns = line.strip().split(delimiter)
    metadata = columns[:PARAMETER_DEFINITIONS_START]
    if unpacked and convert_parameters_to_float:
        parameters = [float(p) for p in columns[PARAMETER_DEFINITIONS_START:]]
    else:
        parameters = columns[PARAMETER_DEFINITIONS_START:]
    return metadata + parameters


[docs]class ModelSpace:
    """A model space, as a collection of model subspaces.

    Attributes:
        model_subspaces:
            List of model subspaces.
        exclusions:
            Hashes of models that are excluded from the model space.
    """

[docs]    def __init__(
        self,
        model_subspaces: List[ModelSubspace],
    ):
        self.model_subspaces = {
            model_subspace.model_subspace_id: model_subspace
            for model_subspace in model_subspaces
        }

[docs]    @staticmethod
    def from_files(
        filenames: List[TYPE_PATH],
    ):
        """Create a model space from model space files.

        Args:
            filenames:
                The locations of the model space files.

        Returns:
            The corresponding model space.
        """
        # TODO validate input?
        model_space_dfs = [
            get_model_space_df(filename) for filename in filenames
        ]
        model_subspaces = []
        for model_space_df, model_space_filename in zip(
            model_space_dfs, filenames
        ):
            for model_subspace_id, definition in model_space_df.iterrows():
                model_subspaces.append(
                    ModelSubspace.from_definition(
                        model_subspace_id=model_subspace_id,
                        definition=definition,
                        parent_path=Path(model_space_filename).parent,
                    )
                )
        model_space = ModelSpace(model_subspaces=model_subspaces)
        return model_space

    @staticmethod
    def from_df(
        df: pd.DataFrame,
        parent_path: TYPE_PATH = None,
    ):
        model_subspaces = []
        for model_subspace_id, definition in df.iterrows():
            model_subspaces.append(
                ModelSubspace.from_definition(
                    model_subspace_id=model_subspace_id,
                    definition=definition,
                    parent_path=parent_path,
                )
            )
        model_space = ModelSpace(model_subspaces=model_subspaces)
        return model_space

    # TODO: `to_df` / `to_file`

[docs]    def search(
        self,
        candidate_space: CandidateSpace,
        limit: int = np.inf,
        exclude: bool = True,
    ):
        """...TODO

        Args:
            candidate_space:
                The candidate space.
            limit:
                The maximum number of models to send to the candidate space (i.e. this
                limit is on the number of models considered, not necessarily approved
                as candidates).
                Note that using a limit may produce unexpected results. For
                example, it may bias candidate models to be chosen only from
                a subset of model subspaces.
            exclude:
                Whether to exclude the new candidates from the model subspaces.
        """
        if candidate_space.limit.reached():
            warnings.warn(
                'The candidate space has already reached its limit of accepted models.',
                RuntimeWarning,
            )
            return candidate_space.models

        @candidate_space.wrap_search_subspaces
        def search_subspaces(only_one_subspace: bool = False):
            # TODO change dict to list of subspaces. Each subspace should manage its own
            #      ID
            if only_one_subspace and len(self.model_subspaces) > 1:
                logging.warning(
                    f'There is more than one model subspace. This can lead to problems for candidate space {candidate_space}, especially if they have different PEtab YAML files.'
                )
            for model_subspace in self.model_subspaces.values():
                model_subspace.search(
                    candidate_space=candidate_space, limit=limit
                )
                if len(candidate_space.models) == limit:
                    break
                elif len(candidate_space.models) > limit:
                    raise ValueError(
                        'An unknown error has occurred. Too many models were '
                        f'generated. Requested limit: {limit}. Number of '
                        f'generated models: {len(candidate_space.models)}.'
                    )

        search_subspaces()

        ## FIXME implement source_path.. somewhere
        # if self.source_path is not None:
        #    for model in candidate_space.models:
        #        # TODO do this change elsewhere instead?
        #        # e.g. model subspace
        #        model.petab_yaml = self.source_path / model.petab_yaml

        if exclude:
            self.exclude_models(candidate_space.models)

        return candidate_space.models

    def __len__(self):
        """Get the number of models in this space."""
        subspace_counts = [len(s) for s in self.model_subspaces]
        total_count = sum(subspace_counts)
        return total_count

    def exclude_model(self, model: Model):
        # FIXME add Exclusions Mixin (or object) to handle exclusions on the subspace
        # and space level.
        for model_subspace in self.model_subspaces.values():
            model_subspace.exclude_model(model)

    def exclude_models(self, models: Iterable[Model]):
        # FIXME add Exclusions Mixin (or object) to handle exclusions on the subspace
        # and space level.
        for model_subspace in self.model_subspaces.values():
            model_subspace.exclude_models(models)
            # model_subspace.reset_exclusions()

    def exclude_model_hashes(self, model_hashes: Iterable[str]):
        # FIXME add Exclusions Mixin (or object) to handle exclusions on the subspace
        # and space level.
        for model_subspace in self.model_subspaces.values():
            model_subspace.exclude_model_hashes(model_hashes=model_hashes)

[docs]    def reset_exclusions(
        self,
        exclusions: Optional[Union[List[Any], None]] = None,
    ) -> None:
        """Reset the exclusions in the model subspaces."""
        for model_subspace in self.model_subspaces.values():
            model_subspace.reset_exclusions(exclusions)


def get_model_space_df(df: Union[TYPE_PATH, pd.DataFrame]) -> pd.DataFrame:
    # model_space_df = pd.read_csv(filename, sep='\t', index_col=MODEL_SUBSPACE_ID)  # FIXME
    if isinstance(df, get_args(TYPE_PATH)):
        df = pd.read_csv(df, sep='\t')
    if df.index.name != MODEL_SUBSPACE_ID:
        df.set_index([MODEL_SUBSPACE_ID], inplace=True)
    return df


def write_model_space_df(df: pd.DataFrame, filename: TYPE_PATH) -> None:
    df.to_csv(filename, sep='\t', index=True)


# def get_model_space(
#    filename: TYPE_PATH,
# ) -> List[ModelSubspace]:
#    model_space_df = get_model_space_df(filename)
#    model_subspaces = []
#    for definition in model_space_df.iterrows():
#        model_subspaces.append(ModelSubspace.from_definition(definition))
#    model_space = ModelSpace(model_subspaces=model_subspaces)
#    return model_space