Source code for empulse.metrics.churn.deterministic

from functools import lru_cache

import numpy as np

from ..._types import FloatArrayLike, FloatNDArray
from ..common import _compute_profits
from ._validation import _validate_input_mp



[docs]
def mpc_score(
    y_true: FloatArrayLike,
    y_score: FloatArrayLike,
    *,
    accept_rate: float = 0.3,
    clv: float = 200,
    incentive_cost: float = 10,
    contact_cost: float = 1,
    check_input: bool = True,
) -> float:
    """
    :func:`~empulse.metrics.mpc()` but only returning the MPC score.

    MPC presumes a situation where identified churners are contacted and offered an incentive to remain customers.
    Only a fraction of churners accepts the incentive offer.
    For detailed information, consult the paper [1]_.

    .. seealso::

        :func:`~empulse.metrics.mpc_score` : to also return the fraction of the customer base
        that should be targeted to maximize profit.

        :func:`~empulse.metrics.empc_score` : for a stochastic version of this metric.

    Parameters
    ----------
    y_true : 1D array-like, shape=(n_samples,)
        Binary target values ('churn': 1, 'no churn': 0).

    y_score : 1D array-like, shape=(n_samples,)
        Target scores, can either be probability estimates or non-thresholded decision values.

    accept_rate : float, default=0.3
        Probability of a customer accepting the retention offer (``0 < accept_rate < 1``).

    clv : float or 1D array-like, shape=(n_samples), default=200
        If ``float``: average customer lifetime value of retained customers (``clv > incentive_cost``).
        If ``array``: customer lifetime value of each customer when retained
        (``mean(clv) > incentive_cost``).

        .. note::
            Passing a CLV array is equivalent to passing a float with the average CLV of that array.

    incentive_cost : float, default=10
        Cost of incentive offered to a customer (``incentive_cost > 0``).

    contact_cost : float, default=1
        Cost of contacting a customer (``contact_cost > 0``).

    check_input : bool, default=True
        Perform input validation.
        Turning off improves performance, useful when using this metric as a loss function.

    Returns
    -------
    mpc : float
        Maximum Profit Measure for Customer Churn.

    Notes
    -----
    The MPC is defined as [1]_:

    .. math::  CLV (\\gamma (1 - \\delta) - \\phi) \\pi_0 F_0(T) - CLV (\\delta + \\phi) \\pi_1 F_1(T)

    The MPC requires that the churn class is encoded as 0, and it is NOT interchangeable (see [2]_ p37).
    However, this implementation assumes the standard notation ('churn': 1, 'no churn': 0).

    An equivalent R implementation is available in [3]_.

    References
    ----------
    .. [1] Verbraken, T., Verbeke, W. and Baesens, B. (2013).
        A Novel Profit Maximizing Metric for Measuring Classification
        Performance of Customer Churn Prediction Models. IEEE Transactions on
        Knowledge and Data Engineering, 25(5), 961-973. Available Online:
        http://ieeexplore.ieee.org/iel5/69/6486492/06165289.pdf?arnumber=6165289
    .. [2] Bravo, C. and Vanden Broucke, S. and Verbraken, T. (2019).
        EMP: Expected Maximum Profit Classification Performance Measure.
        R package version 2.0.5. Available Online:
        http://cran.r-project.org/web/packages/EMP/index.html
    .. [3] Verbraken, T. (2013). Business-Oriented Data Analytics:
        Theory and Case Studies. Ph.D. dissertation, Dept. LIRIS, KU Leuven,
        Leuven, Belgium, 2013.

    Examples
    --------
    >>> from empulse.metrics import mpc_score
    >>>
    >>> y_true = [0, 1, 0, 1, 0, 1, 0, 1]
    >>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9]
    >>> mpc_score(y_true, y_score)
    23.874999999999996

    Using scorer:

    >>> import numpy as np
    >>> from sklearn.datasets import make_classification
    >>> from sklearn.linear_model import LogisticRegression
    >>> from sklearn.model_selection import cross_val_score, StratifiedKFold
    >>> from sklearn.metrics import make_scorer
    >>> from empulse.metrics import mpa_score
    >>>
    >>> X, y = make_classification(random_state=42)
    >>> model = LogisticRegression()
    >>> cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    >>> scorer = make_scorer(
    ...     mpc_score,
    ...     response_method='predict_proba',
    ...     clv=300,
    ...     incentive_cost=15,
    ... )
    >>> np.mean(cross_val_score(model, X, y, cv=cv, scoring=scorer))
    42.08999999999999
    """
    return mpc(
        y_true,
        y_score,
        clv=clv,
        incentive_cost=incentive_cost,
        contact_cost=contact_cost,
        accept_rate=accept_rate,
        check_input=check_input,
    )[0]




[docs]
def mpc(
    y_true: FloatArrayLike,
    y_score: FloatArrayLike,
    *,
    accept_rate: float = 0.3,
    clv: FloatArrayLike | float = 200,
    incentive_cost: float = 10,
    contact_cost: float = 1,
    check_input: bool = True,
) -> tuple[float, float]:
    """
    Maximum Profit Measure for Customer Churn (MPC).

    MPC presumes a situation where identified churners are contacted and offered an incentive to remain customers.
    Only a fraction of churners accepts the incentive offer.
    For detailed information, consult the paper [1]_.

    .. seealso::

        :func:`~empulse.metrics.mpc_score` : to only return the MPC score.

        :func:`~empulse.metrics.empc` : for a stochastic version of this metric.

    Parameters
    ----------
    y_true : 1D array-like, shape=(n_samples,)
        Binary target values ('churn': 1, 'no churn': 0).

    y_score : 1D array-like, shape=(n_samples,)
        Target scores, can either be probability estimates or non-thresholded decision values.

    accept_rate : float, default=0.3
        Probability of a customer accepting the retention offer (``0 < accept_rate < 1``).

    clv : float or 1D array-like, shape=(n_samples), default=200
        If ``float``: average customer lifetime value of retained customers (``clv > incentive_cost``).
        If ``array``: customer lifetime value of each customer when retained
        (``mean(clv) > incentive_cost``).

        .. note::
            Passing a CLV array is equivalent to passing a float with the average CLV of that array.

    incentive_cost : float, default=10
        Cost of incentive offered to a customer (``incentive_cost > 0``).

    contact_cost : float, default=1
        Cost of contacting a customer (``contact_cost > 0``).

    check_input : bool, default=True
        Perform input validation.
        Turning off improves performance, useful when using this metric as a loss function.

    Returns
    -------
    empc : float
        Maximum Profit Measure for Customer Churn

    threshold : float
        Fraction of the customer base that should be targeted to maximize profit

    Notes
    -----
    The MPC is defined as [1]_:

    .. math::  CLV (\\gamma (1 - \\delta) - \\phi) \\pi_0 F_0(T) - CLV (\\delta + \\phi) \\pi_1 F_1(T)

    The MPC requires that the churn class is encoded as 0, and it is NOT interchangeable (see [3]_ p37).
    However, this implementation assumes the standard notation ('churn': 1, 'no churn': 0).

    An equivalent R implementation is available in [2]_.

    References
    ----------
    .. [1] Verbraken, T., Verbeke, W. and Baesens, B. (2013).
        A Novel Profit Maximizing Metric for Measuring Classification
        Performance of Customer Churn Prediction Models. IEEE Transactions on
        Knowledge and Data Engineering, 25(5), 961-973. Available Online:
        http://ieeexplore.ieee.org/iel5/69/6486492/06165289.pdf?arnumber=6165289
    .. [2] Bravo, C. and Vanden Broucke, S. and Verbraken, T. (2019).
        EMP: Expected Maximum Profit Classification Performance Measure.
        R package version 2.0.5. Available Online:
        http://cran.r-project.org/web/packages/EMP/index.html
    .. [3] Verbraken, T. (2013). Business-Oriented Data Analytics:
        Theory and Case Studies. Ph.D. dissertation, Dept. LIRIS, KU Leuven,
        Leuven, Belgium, 2013.

    Examples
    --------
    >>> from empulse.metrics import mpc
    >>>
    >>> y_true = [0, 1, 0, 1, 0, 1, 0, 1]
    >>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9]
    >>> mpc(y_true, y_score)
    (23.874999999999996, 0.875)
    """
    profits, customer_thresholds = _compute_profit_churn(
        y_true,
        y_score,
        clv,
        incentive_cost,
        contact_cost,
        accept_rate,
        check_input,
    )
    max_profit_index = np.argmax(profits)

    return profits[max_profit_index], customer_thresholds[max_profit_index]



def _compute_profit_churn(
    y_true: FloatArrayLike,
    y_score: FloatArrayLike,
    clv: FloatArrayLike | float = 200,
    d: float = 10,
    f: float = 1,
    gamma: float = 0.3,
    check_input: bool = True,
) -> tuple[FloatNDArray, FloatNDArray]:
    if check_input:
        y_true, y_score, clv = _validate_input_mp(y_true, y_score, gamma, clv, d, f)
    else:
        y_true = np.asarray(y_true)
        y_score = np.asarray(y_score)
        clv = np.asarray(clv)
    if isinstance(clv, np.ndarray):
        clv = np.mean(clv)
    cost_benefits = _compute_cost_benefits(gamma, clv, d, f)
    return _compute_profits(y_true, y_score, cost_benefits)


@lru_cache(maxsize=1)
def _compute_cost_benefits(gamma: float, clv: float, d: float, f: float) -> FloatNDArray:
    delta = d / clv
    phi = f / clv

    true_positive_benefit = clv * (gamma * (1 - delta) - phi)
    false_positive_cost = -1 * clv * (delta + phi)
    return np.array([true_positive_benefit, false_positive_cost])