import numpy as np
from numpy.typing import ArrayLike
from ._cy_convex_hull import convex_hull
[docs]
def max_profit_score(
y_true: ArrayLike,
y_score: ArrayLike,
*,
tp_benefit: float = 0.0,
tn_benefit: float = 0.0,
fn_cost: float = 0.0,
fp_cost: float = 0.0,
) -> float:
"""
:func:`~empulse.metrics.max_profit()` but only returning the MP score.
.. seealso::
:func:`~empulse.metrics.max_profit` : To also return the threshold at which the maximum profit is achieved.
:func:`~empulse.metrics.Metric` : To create a stochastic version of the maximum profit.
Parameters
----------
y_true : 1D array-like, shape=(n_samples,)
Binary target values ('positive': 1, 'negative': 0).
y_score : 1D array-like, shape=(n_samples,)
Target scores, can either be probability estimates or non-thresholded decision values.
tp_benefit : float, default=0.0
Benefit attributed to true positive predictions.
tn_benefit : float, default=0.0
Benefit attributed to true negative predictions.
fn_cost : float, default=0.0
Cost attributed to false negative predictions.
fp_cost : float, default=0.0
Cost attributed to false positive predictions.
Returns
-------
mp : float
Maximum Profit.
Notes
-----
The MP is defined as [1]_:
.. math::
\\text{MP} = b_0 \\pi_0 F_0(T) + b_1 \\pi_1 (1 - F_1(T)) - c_0 \\pi_0 (1 - F_0(T)) - c_1 F_1(T)
where :math:`T` is the threshold at which the maximum profit is achieved.
The MP requires that the positive class is encoded as 0, and negative class as 1.
However, this implementation assumes the standard notation ('positive': 1, 'negative': 0).
References
----------
.. [1] Verbraken, T., Verbeke, W. and Baesens, B. (2013).
A Novel Profit Maximizing Metric for Measuring Classification
Performance of Customer Churn Prediction Models. IEEE Transactions on
Knowledge and Data Engineering, 25(5), 961-973. Available Online:
http://ieeexplore.ieee.org/iel5/69/6486492/06165289.pdf?arnumber=6165289
Examples
--------
Reimplement MPC:
>>> from empulse.metrics import max_profit_score
>>>
>>> y_true = [0, 1, 0, 1, 0, 1, 0, 1]
>>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9]
>>>
>>> clv = 200
>>> d = 10
>>> f = 1
>>> gamma = 0.3
>>> tp_benefit = clv * (gamma * (1 - (d / clv)) - (f / clv))
>>> fp_cost = d + f
>>>
>>> max_profit_score(y_true, y_score, tp_benefit=tp_benefit, fp_cost=fp_cost)
24.22...
"""
return max_profit(y_true, y_score, tp_benefit=tp_benefit, tn_benefit=tn_benefit, fn_cost=fn_cost, fp_cost=fp_cost)[
0
]
[docs]
def max_profit(
y_true: ArrayLike,
y_score: ArrayLike,
*,
tp_benefit: float = 0.0,
tn_benefit: float = 0.0,
fn_cost: float = 0.0,
fp_cost: float = 0.0,
) -> tuple[float, float]:
"""
Maximum Profit Measure (MP).
.. seealso::
:func:`~empulse.metrics.max_profit_score` : To only return the maximum profit score.
:func:`~empulse.metrics.Metric` : To create a stochastic version of the maximum profit.
Parameters
----------
y_true : 1D array-like, shape=(n_samples,)
Binary target values ('positive': 1, 'negative': 0).
y_score : 1D array-like, shape=(n_samples,)
Target scores, can either be probability estimates or non-thresholded decision values.
tp_benefit : float, default=0.0
Benefit attributed to true positive predictions.
tn_benefit : float, default=0.0
Benefit attributed to true negative predictions.
fn_cost : float, default=0.0
Cost attributed to false negative predictions.
fp_cost : float, default=0.0
Cost attributed to false positive predictions.
Returns
-------
mp : float
Maximum Profit
threshold : float
Threshold at which the maximum profit is achieved
Notes
-----
The MP is defined as [1]_:
.. math::
\\text{MP} = b_0 \\pi_0 F_0(T) + b_1 \\pi_1 (1 - F_1(T)) - c_0 \\pi_0 (1 - F_0(T)) - c_1 F_1(T)
where :math:`T` is the threshold at which the maximum profit is achieved.
The MP requires that the positive class is encoded as 0, and negative class as 1.
However, this implementation assumes the standard notation ('positive': 1, 'negative': 0).
References
----------
.. [1] Verbraken, T., Verbeke, W. and Baesens, B. (2013).
A Novel Profit Maximizing Metric for Measuring Classification
Performance of Customer Churn Prediction Models. IEEE Transactions on
Knowledge and Data Engineering, 25(5), 961-973. Available Online:
http://ieeexplore.ieee.org/iel5/69/6486492/06165289.pdf?arnumber=6165289
Examples
--------
Reimplement MPC:
>>> from empulse.metrics import max_profit
>>>
>>> y_true = [0, 1, 0, 1, 0, 1, 0, 1]
>>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9]
>>>
>>> clv = 200
>>> d = 10
>>> f = 1
>>> gamma = 0.3
>>> tp_benefit = clv * (gamma * (1 - (d / clv)) - (f / clv))
>>> fp_cost = d + f
>>>
>>> max_profit(y_true, y_score, tp_benefit=tp_benefit, fp_cost=fp_cost)
(23.87..., 0.875)
"""
y_true = np.asarray(y_true, dtype=np.int32, order='C')
y_score = np.asarray(y_score, dtype=np.float64, order='C')
pi0 = float(np.mean(y_true))
pi1 = 1 - pi0
f0, f1 = convex_hull(y_true, y_score)
profits = (tp_benefit + fn_cost) * pi0 * f0 - (tn_benefit + fp_cost) * pi1 * f1 + tn_benefit * pi1 - fn_cost * pi0
best_index = np.argmax(profits)
maximum_profit = float(profits[best_index])
customer_threshold = float(f0[best_index] * pi0 + f1[best_index] * pi1)
return maximum_profit, customer_threshold