Source code for humba.core

"""
module housing core library functionality
"""

import numpy as np
from typing import Optional, Tuple

import humba.jits as jits


[docs]def histogram(
    x: np.ndarray,
    bins: int = 10,
    range: Tuple[float, float] = (0, 10),
    weights: Optional[np.ndarray] = None,
    flow: bool = False,
) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray]:
    """Calculate the histogram for the data ``x``.

    Parameters
    ----------
    x : :obj:`numpy.ndarray`
        data to histogram
    bins : int
        number of bins
    range : (float, float)
        axis range
    weights : :obj:`numpy.ndarray`, optional
        array of weights for ``x``
    flow : bool
        include over and underflow content in first and last bins

    Returns
    -------
    count : :obj:`numpy.ndarray`
        The values of the histogram
    error : :obj:`numpy.ndarray`, optional
        The poission uncertainty on the bin heights
    edges : :obj:`numpy.ndarray`
        The bin edges

    Notes
    -----
    If the dtype of the ``weights`` is not the same as ``x``, then it
    is converted to the dtype of ``x``.

    Examples
    --------
    >>> import numpy as np
    >>> from humba import histogram
    >>> x = np.random.randn(100000)
    >>> w = np.random.uniform(0.4, 0.5, x.shape[0])
    >>> hist1, _, edges = humba.histogram(x, bins=50, range=(-5, 5))
    >>> hist2, _, edges = humba.histogram(x, bins=50, range=(-5, 5), flow=True)
    >>> hist3, error, edges = histogram(x, bins=50, range=(-5, 5), weights=w)
    >>> hist4, error, edges = histogram(x, bins=50, range=(-3, 3), weights=w, flow=True)

    """
    edges = np.linspace(range[0], range[1], bins + 1)
    if weights is not None:
        assert x.shape == weights.shape, "x and weights must have identical shape"
        if x.dtype == np.float64:
            hfunc = jits._hfloat64_weighted
        elif x.dtype == np.float32:
            hfunc = jits._hfloat32_weighted
        else:
            raise TypeError("dtype of input must be float32 or float64")
        res, err = hfunc(x, weights.astype(x.dtype), bins, range[0], range[1], flow)
        return (res, err, edges)
    else:
        if x.dtype == np.float64:
            hfunc = jits._hfloat64
        elif x.dtype == np.float32:
            hfunc = jits._hfloat32
        else:
            raise TypeError("dtype of input must be float32 or float64")
        res = hfunc(x, bins, range[0], range[1], flow)
        return (res, None, edges)


[docs]def mwv_histogram(
    x: np.ndarray,
    weights: np.ndarray,
    bins: int = 10,
    range: Tuple[float, float] = (0, 10),
    flow: bool = False,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Histogram the same data but with multiple weight variations.

    Parameters
    ----------
    x : :obj:`numpy.ndarray`
        data to histogram
    weights : :obj:`numpy.ndarray`, optional
        multidimensional array of weights for ``x`` the first element
        of the ``shape`` attribute must be equal to the length of ``x``.
    bins : int
        number of bins
    range : (float, float)
        axis range
    flow : bool
        include over and underflow content in first and last bins

    Returns
    -------
    count : :obj:`numpy.ndarray`
        The values of the histograms calculated from the weights
        Shape will be (bins, ``weights.shape[0]``)
    error : :obj:`numpy.ndarray`
        The poission uncertainty on the bin heights (shape will be
        the same as ``count``.
    edges : :obj:`numpy.ndarray`
        The bin edges

    Notes
    -----
    If ``x`` is not the same dtype as ``weights``, then it is converted
    to the dtype of ``weights`` (for multi weight histograms we expect
    the weights array to be larger than the data array so we prefer to
    cast the smaller chunk of data).

    """
    edges = np.linspace(range[0], range[1], bins + 1)
    assert x.shape[0] == weights.shape[0], "weights shape is not compatible with x"
    if weights.dtype == np.float64:
        hfunc = jits._hfloat64_multiweights
    elif weights.dtype == np.float32:
        hfunc = jits._hfloat32_multiweights
    else:
        raise TypeError("dtype of input must be float32 or float64")
    res, err = hfunc(x.astype(weights.dtype), weights, bins, range[0], range[1], flow)
    return (res, err, edges)
humba

Navigation

Related Topics

Source code for humba.core