Source code for empulse.metrics.credit_scoring.stochastic

import warnings

import numpy as np

from ..._types import FloatArrayLike, FloatNDArray
from .._cy_convex_hull import convex_hull
from ..common import _compute_prior_class_probabilities, _compute_tpr_fpr_diffs
from ._validation import _validate_input_emp


[docs] def empcs_score( y_true: FloatArrayLike, y_score: FloatArrayLike, *, success_rate: float = 0.55, default_rate: float = 0.1, roi: float = 0.2644, check_input: bool = True, ) -> float: """ :func:`~empulse.metrics.empcs()` but only returning the EMPCS score. EMPCS presumes a situation where a company is considering whether to grant a loan to a customer. Correctly identifying defaulters results in receiving a return on investment (ROI), while incorrectly identifying non-defaulters as defaulters results in a loss of the loan amount. The degree to which the loan is lost is determined by the probability that the entire loan is lost (``default_rate``), probability that the entire loan is paid back (``success_rate``), and a uniform distribution of partial loan losses (``1 - default_rate - success_rate``). For detailed information, consult the paper [1]_. .. seealso:: :func:`~empulse.metrics.empcs` : to also return the fraction of loan applications that should be accepted to maximize profit. :func:`~empulse.metrics.mpcs_score` : for a deterministic version of this metric. Parameters ---------- y_true : 1D array-like, shape=(n_samples,) Binary target values ('acquisition': 1, 'no acquisition': 0). y_score : 1D array-like, shape=(n_samples,) Target scores, can either be probability estimates or non-thresholded decision values. success_rate : float, default=0.55 Probability that the entire loan is paid back (``0 ≤ succes_rate ≤ 1``). default_rate : float, default=0.1 Probability that the entire loan is lost (``0 ≤ default_rate ≤ 1``). roi : float, default=0.2644 Return on investment on the loan (``roi ≥ 0``). check_input : bool, default=True Perform input validation. Turning off improves performance, useful when using this metric as a loss function. Returns ------- empcs : float Expected Maximum Profit measure for customer Credit Scoring. Notes ----- The EMP measure for Credit Scoring is defined as [1]_: .. math:: \\int_0^1 \\lambda \\pi_0 F_0(T) - ROI \\pi_1 F_1(T) \\cdot h(\\lambda) d\\lambda The EMP measure for Credit Scoring requires that the default class is encoded as 0, and it is NOT interchangeable. However, this implementation assumes the standard notation ('default': 1, 'no default': 0). Code adapted from [2]_. References ---------- .. [1] Verbraken, T., Bravo, C., Weber, R., & Baesens, B. (2014). Development and application of consumer credit scoring models using profit-based classification measures. European Journal of Operational Research, 238(2), 505-513. .. [2] https://github.com/Banking-Analytics-Lab/EMP-Py/blob/main/EMP/metrics.py Examples -------- >>> from empulse.metrics import empcs_score >>> >>> y_true = [0, 1, 0, 1, 0, 1, 0, 1] >>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9] >>> empcs_score(y_true, y_score) 0.09747017050000001 Using scorer: >>> import numpy as np >>> from sklearn.datasets import make_classification >>> from sklearn.linear_model import LogisticRegression >>> from sklearn.model_selection import cross_val_score, StratifiedKFold >>> from sklearn.metrics import make_scorer >>> from empulse.metrics import empcs_score >>> >>> X, y = make_classification(random_state=42) >>> model = LogisticRegression() >>> cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) >>> scorer = make_scorer( ... empcs_score, ... response_method='predict_proba', ... roi=0.2, ... success_rate=0.5, ... default_rate=0.1, ... ) >>> np.mean(cross_val_score(model, X, y, cv=cv, scoring=scorer)) 0.14904 """ return empcs( y_true, y_score, success_rate=success_rate, default_rate=default_rate, roi=roi, check_input=check_input, )[0]
[docs] def empcs( y_true: FloatArrayLike, y_score: FloatArrayLike, *, success_rate: float = 0.55, default_rate: float = 0.1, roi: float = 0.2644, check_input: bool = True, ) -> tuple[float, float]: """ Expected Maximum Profit measure for Credit Scoring. EMPCS presumes a situation where a company is considering whether to grant a loan to a customer. Correctly identifying defaulters results in receiving a return on investment (ROI), while incorrectly identifying non-defaulters as defaulters results in a loss of the loan amount. The degree to which the loan is lost is determined by the probability that the entire loan is lost (``default_rate``), probability that the entire loan is paid back (``success_rate``), and a uniform distribution of partial loan losses (``1 - default_rate - success_rate``). For detailed information, consult the paper [1]_. .. seealso:: :func:`~empulse.metrics.empcs_score` : to only return the EMPCS score. :func:`~empulse.metrics.mpcs` : for a deterministic version of this metric. Parameters ---------- y_true : 1D array-like, shape=(n_samples,) Binary target values ('acquisition': 1, 'no acquisition': 0). y_score : 1D array-like, shape=(n_samples,) Target scores, can either be probability estimates or non-thresholded decision values. success_rate : float, default=0.55 Probability that the entire loan is paid back (``0 ≤ succes_rate ≤ 1``). default_rate : float, default=0.1 Probability that the entire loan is lost (``0 ≤ default_rate ≤ 1``). roi : float, default=0.2644 Return on investment on the loan (``roi ≥ 0``). check_input : bool, default=True Perform input validation. Turning off improves performance, useful when using this metric as a loss function. Returns ------- empcs : float Expected Maximum Profit measure for customer Credit Scoring threshold : float Fraction of loan applications that should be accepted to maximize profit Notes ----- The EMP measure for Credit Scoring is defined as [1]_: .. math:: \\int_0^1 \\lambda \\pi_0 F_0(T) - ROI \\pi_1 F_1(T) \\cdot h(\\lambda) d\\lambda The EMP measure for Credit Scoring requires that the default class is encoded as 0, and it is NOT interchangeable. However, this implementation assumes the standard notation ('default': 1, 'no default': 0). References ---------- .. [1] Verbraken, T., Bravo, C., Weber, R., & Baesens, B. (2014). Development and application of consumer credit scoring models using profit-based classification measures. European Journal of Operational Research, 238(2), 505-513. Examples -------- >>> from empulse.metrics import empcs >>> >>> y_true = [0, 1, 0, 1, 0, 1, 0, 1] >>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9] >>> empcs(y_true, y_score) (0.09747017050000001, 0.32434500000000005) """ # noqa: D401 if check_input: y_true, y_score = _validate_input_emp(y_true, y_score, success_rate, default_rate, roi) else: y_true = np.asarray(y_true) y_score = np.asarray(y_score) y_true = y_true.astype(np.int32) y_score = y_score.astype(np.float64) alpha = 1 - success_rate - default_rate positive_class_prob, negative_class_prob = _compute_prior_class_probabilities(y_true) true_positive_rates, false_positive_rates = convex_hull(y_true, y_score) tpr_diff, fpr_diff = _compute_tpr_fpr_diffs(true_positive_rates, false_positive_rates) lambda_cdf_diff, lambda_cdf_sum = _compute_lambda_cdf( roi, tpr_diff, fpr_diff, positive_class_prob, negative_class_prob ) cutoff = len(true_positive_rates) - len(lambda_cdf_diff) if cutoff > 0: true_positive_rates = true_positive_rates[:-cutoff] false_positive_rates = false_positive_rates[:-cutoff] temp_1 = positive_class_prob * true_positive_rates * lambda_cdf_sum / 2 temp_2 = roi * false_positive_rates * negative_class_prob partial_default_term: float = np.sum(alpha * lambda_cdf_diff * (temp_1 - temp_2)) full_default_term = default_rate * ( positive_class_prob * true_positive_rates[-1] - roi * negative_class_prob * false_positive_rates[-1] ) empcs = partial_default_term + full_default_term customer_threshold = np.sum( alpha * lambda_cdf_diff * (positive_class_prob * true_positive_rates + negative_class_prob * false_positive_rates) ) + default_rate * (positive_class_prob * true_positive_rates[-1] + negative_class_prob * false_positive_rates[-1]) return empcs, customer_threshold
def _compute_lambda_cdf( roi: float, tpr_diff: FloatNDArray, fpr_diff: FloatNDArray, positive_class_prob: float, negative_class_prob: float ) -> tuple[FloatNDArray, FloatNDArray]: # ignore division by zero warning with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) lambda_bounds = negative_class_prob * roi / positive_class_prob * (fpr_diff / tpr_diff) # type: ignore[operator] lambda_bounds = np.append(0, lambda_bounds) lambda_bounds = np.append(lambda_bounds[lambda_bounds < 1], 1) return np.diff(lambda_bounds), lambda_bounds[1:] + lambda_bounds[:-1]