Source code for empulse.metrics.max_profit

import numpy as np
from numpy.typing import ArrayLike

from ._cy_convex_hull import convex_hull


[docs] def max_profit_score( y_true: ArrayLike, y_score: ArrayLike, *, tp_benefit: float = 0.0, tn_benefit: float = 0.0, fn_cost: float = 0.0, fp_cost: float = 0.0, ) -> float: """ :func:`~empulse.metrics.max_profit()` but only returning the MP score. .. seealso:: :func:`~empulse.metrics.max_profit` : To also return the threshold at which the maximum profit is achieved. :func:`~empulse.metrics.Metric` : To create a stochastic version of the maximum profit. Parameters ---------- y_true : 1D array-like, shape=(n_samples,) Binary target values ('positive': 1, 'negative': 0). y_score : 1D array-like, shape=(n_samples,) Target scores, can either be probability estimates or non-thresholded decision values. tp_benefit : float, default=0.0 Benefit attributed to true positive predictions. tn_benefit : float, default=0.0 Benefit attributed to true negative predictions. fn_cost : float, default=0.0 Cost attributed to false negative predictions. fp_cost : float, default=0.0 Cost attributed to false positive predictions. Returns ------- mp : float Maximum Profit. Notes ----- The MP is defined as [1]_: .. math:: \\text{MP} = b_0 \\pi_0 F_0(T) + b_1 \\pi_1 (1 - F_1(T)) - c_0 \\pi_0 (1 - F_0(T)) - c_1 F_1(T) where :math:`T` is the threshold at which the maximum profit is achieved. The MP requires that the positive class is encoded as 0, and negative class as 1. However, this implementation assumes the standard notation ('positive': 1, 'negative': 0). References ---------- .. [1] Verbraken, T., Verbeke, W. and Baesens, B. (2013). A Novel Profit Maximizing Metric for Measuring Classification Performance of Customer Churn Prediction Models. IEEE Transactions on Knowledge and Data Engineering, 25(5), 961-973. Available Online: http://ieeexplore.ieee.org/iel5/69/6486492/06165289.pdf?arnumber=6165289 Examples -------- Reimplement MPC: >>> from empulse.metrics import max_profit_score >>> >>> y_true = [0, 1, 0, 1, 0, 1, 0, 1] >>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9] >>> >>> clv = 200 >>> d = 10 >>> f = 1 >>> gamma = 0.3 >>> tp_benefit = clv * (gamma * (1 - (d / clv)) - (f / clv)) >>> fp_cost = d + f >>> >>> max_profit_score(y_true, y_score, tp_benefit=tp_benefit, fp_cost=fp_cost) 24.22... """ return max_profit(y_true, y_score, tp_benefit=tp_benefit, tn_benefit=tn_benefit, fn_cost=fn_cost, fp_cost=fp_cost)[ 0 ]
[docs] def max_profit( y_true: ArrayLike, y_score: ArrayLike, *, tp_benefit: float = 0.0, tn_benefit: float = 0.0, fn_cost: float = 0.0, fp_cost: float = 0.0, ) -> tuple[float, float]: """ Maximum Profit Measure (MP). .. seealso:: :func:`~empulse.metrics.max_profit_score` : To only return the maximum profit score. :func:`~empulse.metrics.Metric` : To create a stochastic version of the maximum profit. Parameters ---------- y_true : 1D array-like, shape=(n_samples,) Binary target values ('positive': 1, 'negative': 0). y_score : 1D array-like, shape=(n_samples,) Target scores, can either be probability estimates or non-thresholded decision values. tp_benefit : float, default=0.0 Benefit attributed to true positive predictions. tn_benefit : float, default=0.0 Benefit attributed to true negative predictions. fn_cost : float, default=0.0 Cost attributed to false negative predictions. fp_cost : float, default=0.0 Cost attributed to false positive predictions. Returns ------- mp : float Maximum Profit threshold : float Threshold at which the maximum profit is achieved Notes ----- The MP is defined as [1]_: .. math:: \\text{MP} = b_0 \\pi_0 F_0(T) + b_1 \\pi_1 (1 - F_1(T)) - c_0 \\pi_0 (1 - F_0(T)) - c_1 F_1(T) where :math:`T` is the threshold at which the maximum profit is achieved. The MP requires that the positive class is encoded as 0, and negative class as 1. However, this implementation assumes the standard notation ('positive': 1, 'negative': 0). References ---------- .. [1] Verbraken, T., Verbeke, W. and Baesens, B. (2013). A Novel Profit Maximizing Metric for Measuring Classification Performance of Customer Churn Prediction Models. IEEE Transactions on Knowledge and Data Engineering, 25(5), 961-973. Available Online: http://ieeexplore.ieee.org/iel5/69/6486492/06165289.pdf?arnumber=6165289 Examples -------- Reimplement MPC: >>> from empulse.metrics import max_profit >>> >>> y_true = [0, 1, 0, 1, 0, 1, 0, 1] >>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9] >>> >>> clv = 200 >>> d = 10 >>> f = 1 >>> gamma = 0.3 >>> tp_benefit = clv * (gamma * (1 - (d / clv)) - (f / clv)) >>> fp_cost = d + f >>> >>> max_profit(y_true, y_score, tp_benefit=tp_benefit, fp_cost=fp_cost) (23.87..., 0.875) """ y_true = np.asarray(y_true, dtype=np.int32, order='C') y_score = np.asarray(y_score, dtype=np.float64, order='C') pi0 = float(np.mean(y_true)) pi1 = 1 - pi0 f0, f1 = convex_hull(y_true, y_score) profits = (tp_benefit + fn_cost) * pi0 * f0 - (tn_benefit + fp_cost) * pi1 * f1 + tn_benefit * pi1 - fn_cost * pi0 best_index = np.argmax(profits) maximum_profit = float(profits[best_index]) customer_threshold = float(f0[best_index] * pi0 + f1[best_index] * pi1) return maximum_profit, customer_threshold