Source code for empulse.metrics.churn.deterministic

from functools import lru_cache

import numpy as np

from ..._types import FloatArrayLike, FloatNDArray
from ..common import _compute_profits
from ._validation import _validate_input_mp


[docs] def mpc_score( y_true: FloatArrayLike, y_score: FloatArrayLike, *, accept_rate: float = 0.3, clv: float = 200, incentive_cost: float = 10, contact_cost: float = 1, check_input: bool = True, ) -> float: """ :func:`~empulse.metrics.mpc()` but only returning the MPC score. MPC presumes a situation where identified churners are contacted and offered an incentive to remain customers. Only a fraction of churners accepts the incentive offer. For detailed information, consult the paper [1]_. .. seealso:: :func:`~empulse.metrics.mpc_score` : to also return the fraction of the customer base that should be targeted to maximize profit. :func:`~empulse.metrics.empc_score` : for a stochastic version of this metric. Parameters ---------- y_true : 1D array-like, shape=(n_samples,) Binary target values ('churn': 1, 'no churn': 0). y_score : 1D array-like, shape=(n_samples,) Target scores, can either be probability estimates or non-thresholded decision values. accept_rate : float, default=0.3 Probability of a customer accepting the retention offer (``0 < accept_rate < 1``). clv : float or 1D array-like, shape=(n_samples), default=200 If ``float``: average customer lifetime value of retained customers (``clv > incentive_cost``). If ``array``: customer lifetime value of each customer when retained (``mean(clv) > incentive_cost``). .. note:: Passing a CLV array is equivalent to passing a float with the average CLV of that array. incentive_cost : float, default=10 Cost of incentive offered to a customer (``incentive_cost > 0``). contact_cost : float, default=1 Cost of contacting a customer (``contact_cost > 0``). check_input : bool, default=True Perform input validation. Turning off improves performance, useful when using this metric as a loss function. Returns ------- mpc : float Maximum Profit Measure for Customer Churn. Notes ----- The MPC is defined as [1]_: .. math:: CLV (\\gamma (1 - \\delta) - \\phi) \\pi_0 F_0(T) - CLV (\\delta + \\phi) \\pi_1 F_1(T) The MPC requires that the churn class is encoded as 0, and it is NOT interchangeable (see [2]_ p37). However, this implementation assumes the standard notation ('churn': 1, 'no churn': 0). An equivalent R implementation is available in [3]_. References ---------- .. [1] Verbraken, T., Verbeke, W. and Baesens, B. (2013). A Novel Profit Maximizing Metric for Measuring Classification Performance of Customer Churn Prediction Models. IEEE Transactions on Knowledge and Data Engineering, 25(5), 961-973. Available Online: http://ieeexplore.ieee.org/iel5/69/6486492/06165289.pdf?arnumber=6165289 .. [2] Bravo, C. and Vanden Broucke, S. and Verbraken, T. (2019). EMP: Expected Maximum Profit Classification Performance Measure. R package version 2.0.5. Available Online: http://cran.r-project.org/web/packages/EMP/index.html .. [3] Verbraken, T. (2013). Business-Oriented Data Analytics: Theory and Case Studies. Ph.D. dissertation, Dept. LIRIS, KU Leuven, Leuven, Belgium, 2013. Examples -------- >>> from empulse.metrics import mpc_score >>> >>> y_true = [0, 1, 0, 1, 0, 1, 0, 1] >>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9] >>> mpc_score(y_true, y_score) 23.874999999999996 Using scorer: >>> import numpy as np >>> from sklearn.datasets import make_classification >>> from sklearn.linear_model import LogisticRegression >>> from sklearn.model_selection import cross_val_score, StratifiedKFold >>> from sklearn.metrics import make_scorer >>> from empulse.metrics import mpa_score >>> >>> X, y = make_classification(random_state=42) >>> model = LogisticRegression() >>> cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) >>> scorer = make_scorer( ... mpc_score, ... response_method='predict_proba', ... clv=300, ... incentive_cost=15, ... ) >>> np.mean(cross_val_score(model, X, y, cv=cv, scoring=scorer)) 42.08999999999999 """ return mpc( y_true, y_score, clv=clv, incentive_cost=incentive_cost, contact_cost=contact_cost, accept_rate=accept_rate, check_input=check_input, )[0]
[docs] def mpc( y_true: FloatArrayLike, y_score: FloatArrayLike, *, accept_rate: float = 0.3, clv: FloatArrayLike | float = 200, incentive_cost: float = 10, contact_cost: float = 1, check_input: bool = True, ) -> tuple[float, float]: """ Maximum Profit Measure for Customer Churn (MPC). MPC presumes a situation where identified churners are contacted and offered an incentive to remain customers. Only a fraction of churners accepts the incentive offer. For detailed information, consult the paper [1]_. .. seealso:: :func:`~empulse.metrics.mpc_score` : to only return the MPC score. :func:`~empulse.metrics.empc` : for a stochastic version of this metric. Parameters ---------- y_true : 1D array-like, shape=(n_samples,) Binary target values ('churn': 1, 'no churn': 0). y_score : 1D array-like, shape=(n_samples,) Target scores, can either be probability estimates or non-thresholded decision values. accept_rate : float, default=0.3 Probability of a customer accepting the retention offer (``0 < accept_rate < 1``). clv : float or 1D array-like, shape=(n_samples), default=200 If ``float``: average customer lifetime value of retained customers (``clv > incentive_cost``). If ``array``: customer lifetime value of each customer when retained (``mean(clv) > incentive_cost``). .. note:: Passing a CLV array is equivalent to passing a float with the average CLV of that array. incentive_cost : float, default=10 Cost of incentive offered to a customer (``incentive_cost > 0``). contact_cost : float, default=1 Cost of contacting a customer (``contact_cost > 0``). check_input : bool, default=True Perform input validation. Turning off improves performance, useful when using this metric as a loss function. Returns ------- empc : float Maximum Profit Measure for Customer Churn threshold : float Fraction of the customer base that should be targeted to maximize profit Notes ----- The MPC is defined as [1]_: .. math:: CLV (\\gamma (1 - \\delta) - \\phi) \\pi_0 F_0(T) - CLV (\\delta + \\phi) \\pi_1 F_1(T) The MPC requires that the churn class is encoded as 0, and it is NOT interchangeable (see [3]_ p37). However, this implementation assumes the standard notation ('churn': 1, 'no churn': 0). An equivalent R implementation is available in [2]_. References ---------- .. [1] Verbraken, T., Verbeke, W. and Baesens, B. (2013). A Novel Profit Maximizing Metric for Measuring Classification Performance of Customer Churn Prediction Models. IEEE Transactions on Knowledge and Data Engineering, 25(5), 961-973. Available Online: http://ieeexplore.ieee.org/iel5/69/6486492/06165289.pdf?arnumber=6165289 .. [2] Bravo, C. and Vanden Broucke, S. and Verbraken, T. (2019). EMP: Expected Maximum Profit Classification Performance Measure. R package version 2.0.5. Available Online: http://cran.r-project.org/web/packages/EMP/index.html .. [3] Verbraken, T. (2013). Business-Oriented Data Analytics: Theory and Case Studies. Ph.D. dissertation, Dept. LIRIS, KU Leuven, Leuven, Belgium, 2013. Examples -------- >>> from empulse.metrics import mpc >>> >>> y_true = [0, 1, 0, 1, 0, 1, 0, 1] >>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9] >>> mpc(y_true, y_score) (23.874999999999996, 0.875) """ profits, customer_thresholds = _compute_profit_churn( y_true, y_score, clv, incentive_cost, contact_cost, accept_rate, check_input, ) max_profit_index = np.argmax(profits) return profits[max_profit_index], customer_thresholds[max_profit_index]
def _compute_profit_churn( y_true: FloatArrayLike, y_score: FloatArrayLike, clv: FloatArrayLike | float = 200, d: float = 10, f: float = 1, gamma: float = 0.3, check_input: bool = True, ) -> tuple[FloatNDArray, FloatNDArray]: if check_input: y_true, y_score, clv = _validate_input_mp(y_true, y_score, gamma, clv, d, f) else: y_true = np.asarray(y_true) y_score = np.asarray(y_score) clv = np.asarray(clv) if isinstance(clv, np.ndarray): clv = np.mean(clv) cost_benefits = _compute_cost_benefits(gamma, clv, d, f) return _compute_profits(y_true, y_score, cost_benefits) @lru_cache(maxsize=1) def _compute_cost_benefits(gamma: float, clv: float, d: float, f: float) -> FloatNDArray: delta = d / clv phi = f / clv true_positive_benefit = clv * (gamma * (1 - delta) - phi) false_positive_cost = -1 * clv * (delta + phi) return np.array([true_positive_benefit, false_positive_cost])