Source code for fruitbat.utils

"""
The collection of utility functions for Fruitbat.
"""
import os
import numpy as np
from scipy import interpolate

__all__ = ["check_keys_in_dict", "check_type", "calc_mean_from_pdf", 
    "calc_median_from_pdf", "calc_std_from_pdf", "calc_variance_from_pdf", 
    "calc_z_from_pdf_percentile", "normalise_to_pdf", "redshift_prior", 
    "sigma_to_pdf_percentiles", ]


[docs]def check_type(value_name, value, dtype, desire=True):
    """
    Checks the type of a variable and raises an error if not the desired type.

    Parameters
    ----------
    value_name : str
        The name of the variable that will be printed in the error message.

    value :
        The value of the variable

    dtype : dtype
        The data type to compare with isinstance

    desire : boolean, optional
        If `desire = True`, then the error will be raised if value does not
        have a data type of `dtype`. If `desire = False`, then the error will
        be raised if value does have a data type of `dtype`.

    Returns
    -------
    None

    """

    if isinstance(value, dtype) is not desire:

        # Change the error message depending on if we did or did
        # not want a specific data type
        if desire:
            msg_add_in = "have"
        elif not desire:
            msg_add_in = "not have"

        msg = ("The value of {0} should {3} type: {1}. "
               "Instead type({0}) = {2}".format(value_name, dtype, type(value), msg_add_in))

        raise ValueError(msg)

    else:
        pass

[docs]def check_keys_in_dict(dictionary, keys):
    """
    Checks that a list of keys exist in a dictionary.

    Parameters
    ----------
    dictionary : dict
        The input dictionary.

    keys: list of strings
        The keys that the dictionary must contain.

    Returns
    -------
    bool:
        Returns *True* is all required keys exist in the dictionary.
        Otherwise a KeyError is raised.
    """
    if not all(key in dictionary for key in keys):
        raise KeyError("Dictionary missing key values."
                       "Requires: {}".format(keys))
    return True


def get_path_to_file_from_here(filename, subdirs=None):
    """
    Returns the whole path to a file that is in the same directory
    or subdirectory as the file this function is called from.

    Parameters
    ----------
    filename : str
        The name of the file

    subdirs : list of strs, optional
        A list of strings containing any subdirectory names.
        Default: None

    Returns
    -------
    str
        The whole path to the file

    """

    if subdirs is None:
        path = os.path.join(os.path.dirname(__file__), filename)
    elif isinstance(subdirs, list):
        path = os.path.join(os.path.dirname(__file__), *subdirs, filename)
    else:
        msg = ("subdirs must have type list. "
               "If you want a single subdirectory, use subdirs=['data']")
        raise ValueError(msg)

    return path


[docs]def calc_mean_from_pdf(x, pdf, dx=None):
    """
    Calculates the mean of a probability density function

    Parameters
    ----------
    x : np.ndarray
        The x values.

    pdf : np.ndarray
        The value of the PDF at x.

    dx : np.ndarray or None, optional
        The spacing between the x bins. 
        If `None`, then the bins are assumed to be linearly spaced.

    Returns
    -------
    mean : float
        The mean of the PDF.

    """
    if dx is None:
        # If no dx is provided assume they are linearly spaced
        dx = (x[-1] - x[0]) / len(x)

    return np.sum(pdf * x * dx)

[docs]def calc_variance_from_pdf(x, pdf, dx=None):
    """
    Calculates the variance from a probability density
    function.

    Parameters
    ----------
    x : np.ndarray
        The x values.

    pdf : np.ndarray
        The value of the PDF at x.

    dx : np.ndarray or None, optional
        The spacing between the x bins. 
        If `None`, then the bins are assumed to be linearly spaced.

    Returns
    -------
    variance : float
        The variance of the PDF.

    """

    if dx is None:
        # If no dx is provided assume they are linearly spaced
        dx = (x[-1] - x[0]) / len(x)

    mean = calc_mean_from_pdf(x, pdf, dx)

    return np.sum(pdf * dx * (x - mean)**2)


[docs]def calc_std_from_pdf(x, pdf, dx=None):
    """
    Calculates the standard deviation from a probability
    density function.

    Parameters
    ----------
    x : np.ndarray
        The x values.

    pdf : np.ndarray
        The value of the PDF at x.

    dx : np.ndarray or None, optional
        The spacing between the x bins. 
        If `None`, then the bins are assumed to be linearly spaced.

    Returns
    -------
    std : float
        The standard deviation of the PDF.

    """
    if dx is None:
        # If no dx is provided assume they are linearly spaced
        dx = (x[-1] - x[0]) / len(x)

    return np.sqrt(calc_variance_from_pdf(x, pdf, dx))


[docs]def calc_z_from_pdf_percentile(x, pdf, percentile):
    """


    Parameters
    ----------
    x : np.ndarray
        The x values of the PDF.

    pdf : np.ndarray
        The value of the PDF at x.

    percentile : float
        The percentile of the PDF.

    Returns
    -------
    redshift : float
        The redshift at the given percentile.

    """
    cumsum = np.cumsum(pdf)
    normed_cumsum = cumsum / cumsum[-1]
    interpolated_cumsum = interpolate.interp1d(normed_cumsum, x)
    return interpolated_cumsum(percentile)




[docs]def calc_median_from_pdf(x, pdf):
    """
    Calculates the median of a PDF.

    Parameters
    ----------
    x : np.ndarray
        The x values.

    pdf : np.ndarray
        The value of the PDF at x.

    Returns
    -------
    median: float
        The median of the PDF.

    """
    return calc_z_from_pdf_percentile(x, pdf, percentile=0.5)


[docs]def normalise_to_pdf(hist, bin_widths):
    """
    """
    if np.sum(hist) < 1e-16:
        pdf = np.zeros(len(hist))
    else:
        pdf = hist/bin_widths/np.sum(hist)

    return pdf


def linear_interpolate_pdfs(sample, xvals, pdfs):
    """

    Parameters
    ----------
    sample

    xvals: 

    Returns
    -------
    PDF: np.ndarray
        The PDF at sample.
    """
    x1, x2 = xvals
    pdf1, pdf2 = pdfs

    grad = (pdf2 - pdf1) / (x2 - x1)
    dist = sample - x1

    return grad * dist + pdf1



[docs]def sigma_to_pdf_percentiles(sigma):
    """
    Looks up the percentile range of Gaussian for a given
    standard deviation.

    Parameters
    ----------
    sigma: [1, 2, 3, 4, 5]
        The standard deviation to calculate a percentile.

    Returns
    -------
    Lower: float
        The lower percentile
    Higher: float
        The higher percentile

    Example
    -------
    >>> sigma_to_pdf_percentiles(1)
    (0.158655254, 0.841344746)

    """

    std = int(sigma)
    std_prop = {
        1: 0.682689492,
        2: 0.954499736,
        3: 0.997300204,
        4: 0.99993666,
        5: 0.999999426697,
    }

    std_limits = {
        1: ((1 - std_prop[1]) / 2, (1 + std_prop[1]) / 2),
        2: ((1 - std_prop[2]) / 2, (1 + std_prop[2]) / 2),
        3: ((1 - std_prop[3]) / 2, (1 + std_prop[3]) / 2),
        4: ((1 - std_prop[4]) / 2, (1 + std_prop[4]) / 2),
        5: ((1 - std_prop[5]) / 2, (1 + std_prop[5]) / 2),
    }

    return std_limits[std]

[docs]def redshift_prior(zbins, prior="uniform"):
    """
    """
    available_priors = [
        "uniform",
        "volume",
    ]

    if prior not in available_priors:
        msg = ("'{}' is not in the list of available priors".format(prior))
        raise ValueError(msg)


    if prior == "uniform":
        Pz = np.ones_like(zbins)

    elif prior == "volume":
        msg = "The volume dependent prior has not been implimented yet"
        raise NotImplementedError(msg)

    return Pz