Source code for pyaerocom.mathutils

"""
Mathematical low level utility methods of pyaerocom
"""

import numpy as np
from scipy.stats import kendalltau, pearsonr, spearmanr

from pyaerocom._warnings import ignore_warnings

### LAMBDA FUNCTIONS
in_range = lambda x, low, high: low <= x <= high

### OTHER FUNCTIONS



[docs]
def is_strictly_monotonic(iter1d) -> bool:
    """
    Check if 1D iterble is strictly monotonic

    Parameters
    ----------
    iter1d
        1D iterable object to be tested

    Returns
    -------
    bool

    """
    return True if np.all(np.diff(iter1d) > 0) else False




[docs]
def make_binlist(vmin: float, vmax: float, num: int = None) -> list:
    """"""
    if num is None:
        num = 8
    return list(np.linspace(vmin, vmax, num + 1))




[docs]
def weighted_sum(data, weights):
    """Compute weighted sum using numpy dot product

    Parameters
    ----------
    data : ndarray
        data array that is supposed to be summed up
    weights : ndarray
        array containing weights for each point in `data`

    Returns
    -------
    float
        weighted sum of values in input array
    """
    return np.dot(data, weights)




[docs]
def sum(data, weights=None):
    """Summing operation with option to perform weighted sum

    Parameters
    ----------
    data : ndarray
        data array that is supposed to be summed up
    weights : ndarray, optional
        array containing weights for each point in `data`

    Returns
    -------
    float or int
        sum of values in input array
    """
    if weights is None:
        return np.sum(data)
    return weighted_sum(data, weights)




[docs]
def weighted_mean(data, weights):
    """Compute weighted mean

    Parameters
    ----------
    data : ndarray
        data array that is supposed to be averaged
    weights : ndarray
        array containing weights for each point in `data`

    Returns
    -------
    float or int
        weighted mean of data array
    """
    return np.sum(data * weights) / np.sum(weights)




[docs]
def weighted_cov(ref_data, data, weights):
    """Compute weighted covariance

    Parameters
    ----------
    data_ref : ndarray
        x data
    data : ndarray
        y data
    weights : ndarray
        array containing weights for each point in `data`

    Returns
    -------
    float
        covariance
    """
    avgx = weighted_mean(ref_data, weights)
    avgy = weighted_mean(data, weights)
    return np.sum(weights * (ref_data - avgx) * (data - avgy)) / np.sum(weights)




[docs]
def weighted_corr(ref_data, data, weights):
    """Compute weighted correlation

    Parameters
    ----------
    data_ref : ndarray
        x data
    data : ndarray
        y data
    weights : ndarray
        array containing weights for each point in `data`

    Returns
    -------
    float
       weighted correlation coefficient
    """
    wcovxy = weighted_cov(ref_data, data, weights)

    wcovxx = weighted_cov(ref_data, ref_data, weights)
    wcovyy = weighted_cov(data, data, weights)
    wsigmaxy = np.sqrt(wcovxx * wcovyy)
    return wcovxy / wsigmaxy




[docs]
@ignore_warnings(
    RuntimeWarning, "invalid value encountered in double_scalars", "An input array is constant"
)
def corr(ref_data, data, weights=None):
    """Compute correlation coefficient

    Parameters
    ----------
    data_ref : ndarray
        x data
    data : ndarray
        y data
    weights : ndarray, optional
        array containing weights for each point in `data`

    Returns
    -------
    float
       correlation coefficient
    """
    if weights is None:
        return pearsonr(ref_data, data)[0]
    return weighted_corr(ref_data, data, weights)




[docs]
def closest_index(num_array, value):
    """Returns index in number array that is closest to input value"""
    return np.argmin(np.abs(np.asarray(num_array) - value))




[docs]
def numbers_in_str(input_string):
    """This method finds all numbers in a string

    Note
    ----
    - Beta version, please use with care
    - Detects only integer numbers, dots are ignored

    Parameters
    ----------
    input_string : str
        string containing numbers

    Returns
    -------
    list
        list of strings specifying all numbers detected in string

    Example
    -------
    >>> numbers_in_str('Bla42Blub100')
    [42, 100]
    """
    numbers = []
    IN_NUM = False
    c_num = None
    for char in input_string:
        try:
            int(char)
            if not IN_NUM:
                IN_NUM = True
                c_num = char
            elif IN_NUM:
                c_num += char
        except Exception:
            if IN_NUM:
                numbers.append(c_num)
            IN_NUM = False
    if IN_NUM:
        numbers.append(c_num)
    return numbers




[docs]
def exponent(num):
    """Get exponent of input number

    Parameters
    ----------
    num : :obj:`float` or iterable
        input number

    Returns
    -------
    :obj:`int` or :obj:`ndarray` containing ints
        exponent of input number(s)

    Example
    -------
    >>> from pyaerocom.mathutils import exponent
    >>> exponent(2340)
    3
    """
    return np.floor(np.log10(abs(np.asarray(num)))).astype(int)




[docs]
def range_magnitude(low, high):
    """Returns magnitude of value range

    Parameters
    ----------
    low : float
        lower end of range
    high : float
        upper end of range

    Returns
    -------
    int
        magnitudes spanned by input numbers

    Example
    -------

    >>> range_magnitude(0.1, 100)
    3
    >>> range_magnitude(100, 0.1)
    -3
    >>> range_magnitude(1e-3, 1e6)
    9

    """
    return exponent(high) - exponent(low)




[docs]
def estimate_value_range(vmin, vmax, extend_percent=0):
    """
    Round and extend input range to estimate lower and upper bounds of range

    Parameters
    ----------
    vmin : float
        lower value of range
    vmax : float
        upper value of range
    extend_percent : int
        percentage specifying to which extent the input range is supposed to be
        extended.

    Returns
    -------
    float
        estimated lower end of range
    float
        estimated upper end of range


    """
    if not vmax > vmin:
        raise ValueError("vmax needs to exceed vmin")
    # extent value range by +/- 5%
    offs = (vmax - vmin) * extend_percent * 0.01
    vmin, vmax = vmin - offs, vmax + offs

    if vmin != 0:
        exp = float(exponent(vmin))
    else:
        exp = float(exponent(vmax))
    # round values
    vmin = np.floor(vmin * 10 ** (-exp)) * 10.0 ** (exp)
    vmax = np.ceil(vmax * 10 ** (-exp)) * 10.0 ** (exp)
    return vmin, vmax