Source code for eulerpi.core.dense_grid

"""Module implementing the :py:func:`inference <eulerpi.core.inference.inference>` with a dense grid.

The inference with a dense grid approximates the (joint/marginal) parameter distribution(s) on a dense grid.

.. note::

    The functions in this module are mainly intended for internal use and are accessed by :func:`inference <eulerpi.core.inference>` function.
    Read the documentation of :func:`inference_dense_grid <eulerpi.core.dense_grid.inference_dense_grid>` to learn more
    about the available options for the inference with a dense grid.
"""

import typing
from itertools import repeat
from multiprocessing import get_context
from typing import Dict, Tuple, Union

import numpy as np
from numpy.polynomial.chebyshev import chebpts1

from eulerpi.core.data_transformations import DataTransformation
from eulerpi.core.dense_grid_types import DenseGridType
from eulerpi.core.inference_types import InferenceType
from eulerpi.core.models import BaseModel
from eulerpi.core.result_manager import ResultManager
from eulerpi.core.sampling import calc_kernel_width
from eulerpi.core.transformations import evaluate_density



[docs]
def generate_chebyshev_grid(
    num_grid_points: np.ndarray,
    limits: np.ndarray,
    flatten=False,
) -> Union[np.ndarray, list[np.ndarray]]:
    """Generate a grid with the given number of grid points for each dimension.

    Args:
        num_grid_points(np.ndarray): The number of grid points for each dimension.
        limits(np.ndarray): The limits for each dimension.
        flatten(bool): If True, the grid is returned as a flatten array. If False, the grid is returned as a list of arrays, one for each dimension. (Default value = False)

    Returns:
        np.ndarray: The grid containing the grid points.

    """
    ndim = num_grid_points.size
    axes = [
        chebpts1(num_grid_points[i]) * (limits[i][1] - limits[i][0]) / 2
        + (limits[i][1] + limits[i][0]) / 2
        for i in range(ndim)
    ]
    mesh = np.meshgrid(*axes, indexing="ij")
    if flatten:
        return np.array(mesh).reshape(ndim, -1).T
    else:
        return mesh




[docs]
def generate_regular_grid(
    num_grid_points: np.ndarray,
    limits: np.ndarray,
    flatten=False,
) -> Union[np.ndarray, list[np.ndarray]]:
    """Generate a grid with the given number of grid points for each dimension.

    Args:
        num_grid_points(np.ndarray): The number of grid points for each dimension.
        limits(np.ndarray): The limits for each dimension.
        flatten(bool): If True, the grid is returned as a flatten array. If False, the grid is returned as a list of arrays, one for each dimension. (Default value = False)

    Returns:
        np.ndarray: The grid containing the grid points.

    """
    ndim = num_grid_points.size
    axes = [
        np.linspace(limits[i][0], limits[i][1], num=num_grid_points[i])
        for i in range(ndim)
    ]
    mesh = np.meshgrid(*axes, indexing="ij")
    if flatten:
        return np.array(mesh).reshape(ndim, -1).T
    else:
        return mesh




[docs]
def evaluate_on_grid_chunk(
    args: typing.Tuple[
        np.ndarray,
        BaseModel,
        np.ndarray,
        DataTransformation,
        np.ndarray,
        np.ndarray,
    ],
) -> np.ndarray:
    """Define a function which evaluates the density for a given grid chunk. The input args contains the grid chunk, the model, the data, the data_stdevs and the slice.

    Args:
        grid_chunk(np.ndarray): The grid chunk contains the grid points (parameter vectors) for which the density should be evaluated.
        model(BaseModel): The model used for the inference.
        data(np.ndarray): The data points used for the inference.
        data_transformation (DataTransformation): The data transformation used to normalize the data.
        data_stdevs(np.ndarray): The standard deviations of the data points. (Currently the kernel width, #TODO!)
        slice(np.ndarray): The slice defines for which dimensions of the grid points / paramater vectors the marginal density should be evaluated.

    Returns:
        np.ndarray: The evaluation results for the given grid chunk. It is a vector, containing the parameters in the first columns, the simulation results in the second columns and the density evaluations in the last columns.
    """
    grid_chunk, model, data, data_transformation, data_stdevs, slice = args

    # Init the result array
    evaluation_results = np.zeros(
        (grid_chunk.shape[0], model.data_dim + slice.shape[0] + 1)
    )
    # Evaluate the grid points
    for i, gridPoint in enumerate(grid_chunk):
        density, param_sim_res_density = evaluate_density(
            gridPoint, model, data, data_transformation, data_stdevs, slice
        )
        evaluation_results[i] = param_sim_res_density
    return evaluation_results




[docs]
def run_dense_grid_evaluation(
    model: BaseModel,
    data: np.ndarray,
    data_transformation: DataTransformation,
    slice: np.ndarray,
    result_manager: ResultManager,
    num_grid_points: np.ndarray,
    dense_grid_type: DenseGridType,
    num_processes: int,
    load_balancing_safety_faktor: int,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """This function runs a dense grid evaluation for the given model and data.

    Args:
        model(BaseModel): The model for which the evaluation should be performed.
        data(np.ndarray): The data for which the evaluation should be performed.
        data_transformation (DataTransformation): The data transformation used to normalize the data.
        slice(np.ndarray): The slice for which the evaluation should be performed.
        result_manager(ResultManager): The result manager that should be used to save the results.
        num_grid_points(np.ndarray): The number of grid points for each dimension.
        dense_grid_type(DenseGridType): The type of grid that should be used. (Default value = DenseGridType.EQUIDISTANT)
        num_processes(int): The number of processes that should be used for the evaluation. (Default value = NUM_PROCESSES)
        load_balancing_safety_faktor(int): Split the grid into num_processes * load_balancing_safety_faktor chunks.
            This will ensure that each process can be loaded with a similar amount of work if the run time difference between the evaluations
            does not exceed the load_balancing_safety_faktor. (Default value = 4)

    Returns:
        Tuple[np.ndarray, np.ndarray, np.ndarray]: The parameter samples, the corresponding simulation results, the corresponding density
        evaluations for the given slice.

    Raises:
        ValueError: If the dimension of the numbers of grid points does not match the number of parameters in the slice.
        ValueError: If the grid type is unknown.

    """

    if slice.shape[0] != num_grid_points.shape[0]:
        raise ValueError(
            f"The dimension of the numbers of grid points {num_grid_points} does not match the number of parameters in the slice {slice}"
        )
    if model.param_limits.ndim == 1:
        limits = model.param_limits
    else:
        limits = model.param_limits[slice, :]

    if dense_grid_type == DenseGridType.CHEBYSHEV:
        grid = generate_chebyshev_grid(num_grid_points, limits, flatten=True)
    elif dense_grid_type == DenseGridType.EQUIDISTANT:
        grid = generate_regular_grid(num_grid_points, limits, flatten=True)
    else:
        raise ValueError(f"Unknown grid type: {dense_grid_type}")

    data_stdevs = calc_kernel_width(data)
    # Split the grid into chunks that can be evaluated by each process
    grid_chunks = np.array_split(
        grid, num_processes * load_balancing_safety_faktor
    )

    pool = get_context("spawn").Pool(processes=num_processes)
    tasks = zip(
        grid_chunks,
        repeat(model),
        repeat(data),
        repeat(data_transformation),
        repeat(data_stdevs),
        repeat(slice),
    )
    results = pool.map(evaluate_on_grid_chunk, tasks)
    pool.close()
    pool.join()
    results = np.concatenate(results)

    data_dim = model.data_dim

    result_manager.save_overall(
        slice,
        results[:, 0 : slice.shape[0]],
        results[:, slice.shape[0] : slice.shape[0] + data_dim],
        results[:, slice.shape[0] + data_dim :],
    )
    return (
        results[:, 0 : slice.shape[0]],
        results[:, slice.shape[0] : slice.shape[0] + data_dim],
        results[:, slice.shape[0] + data_dim :],
    )




[docs]
def inference_dense_grid(
    model: BaseModel,
    data: np.ndarray,
    data_transformation: DataTransformation,
    result_manager: ResultManager,
    slices: list[np.ndarray],
    num_processes: int,
    num_grid_points: Union[int, list[np.ndarray]] = 10,
    dense_grid_type: DenseGridType = DenseGridType.EQUIDISTANT,
    load_balancing_safety_faktor: int = 4,
) -> Tuple[
    Dict[str, np.ndarray],
    Dict[str, np.ndarray],
    Dict[str, np.ndarray],
    ResultManager,
]:
    """This function runs a dense grid inference for the given model and data.

    Args:
        model (BaseModel): The model describing the mapping from parameters to data.
        data (np.ndarray): The data to be used for the inference.
        data_transformation (DataTransformation): The data transformation used to normalize the data.
        result_manager (ResultManager): The result manager to be used for the inference.
        slices (np.ndarray): A list of slices to be used for the inference.
        num_processes (int): The number of processes to be used for the inference.
        num_grid_points (Union[int, list[np.ndarray]], optional): The number of grid points to be used for each parameter. If an int is given, it is assumed to be the same for all parameters. Defaults to 10.
        dense_grid_type (DenseGridType, optional): The type of grid that should be used. Defaults to DenseGridType.EQUIDISTANT.
        load_balancing_safety_faktor (int, optional): Split the grid into num_processes * load_balancing_safety_faktor chunks. Defaults to 4.

    Returns:
        Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray], Dict[str, np.ndarray], ResultManager]: The parameter samples, the corresponding simulation results, the corresponding density
        evaluations for each slice and the result manager used for the inference.

    Raises:
        TypeError: If the num_grid_points argument has the wrong type.
    """

    # If the number of grid points is given as an int, we construct a list of arrays with the same number of grid points for each parameter in the slice
    if isinstance(num_grid_points, int):
        num_grid_points = [
            np.full(len(slice), num_grid_points) for slice in slices
        ]
    elif isinstance(num_grid_points, list[np.ndarray]):
        pass
    else:
        raise TypeError(
            f"The num_grid_points argument has to be either an int or a list of arrays. The passed argument was of type {type(num_grid_points)}"
        )
    # create the return dictionaries
    overall_params, overall_sim_results, overall_density_evals = {}, {}, {}

    for slice, n_points in zip(slices, num_grid_points):
        slice_name = result_manager.get_slice_name(slice)
        (
            overall_params[slice_name],
            overall_sim_results[slice_name],
            overall_density_evals[slice_name],
        ) = run_dense_grid_evaluation(
            model=model,
            data=data,
            data_transformation=data_transformation,
            slice=slice,
            result_manager=result_manager,
            num_grid_points=n_points,
            dense_grid_type=dense_grid_type,
            num_processes=num_processes,
            load_balancing_safety_faktor=load_balancing_safety_faktor,
        )
        result_manager.save_inference_information(
            slice=slice,
            model=model,
            inference_type=InferenceType.DENSE_GRID,
            num_processes=num_processes,
            load_balancing_safety_faktor=load_balancing_safety_faktor,
            num_grid_points=n_points,
            dense_grid_type=dense_grid_type,
        )
    return (
        overall_params,
        overall_sim_results,
        overall_density_evals,
        result_manager,
    )