Source code for empulse.metrics.common

import numpy as np

from .._types import FloatArrayLike, FloatNDArray


[docs] def classification_threshold(y_true: FloatArrayLike, y_score: FloatArrayLike, customer_threshold: float) -> float: """ Return classification threshold for given customer threshold. Parameters ---------- y_true : 1D array-like, shape=(n_samples,) Binary target values ('positive': 1, 'negative': 0). y_score : 1D array-like, shape=(n_samples,) Target scores, can either be probability estimates or non-thresholded decision values. customer_threshold : float Customer threshold determined by value-driven metric. Returns ------- threshold : float Classification threshold for given customer threshold. Examples -------- >>> from empulse.metrics import classification_threshold >>> from empulse.metrics import empc >>> y_true = [0, 1, 0, 1, 0, 1, 0, 1] >>> y_score = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9] >>> score, threshold = empc(y_true, y_score) >>> classification_threshold(y_true, y_score, threshold) 0.2 """ y_true = np.asarray(y_true) y_score = np.asarray(y_score) confusion_matrix, sorted_indices, duplicated_prediction_indices = _compute_confusion_matrix(y_true, y_score) classification_thresholds = np.pad(y_score[sorted_indices], pad_width=(1, 0), constant_values=1) classification_thresholds = np.delete(classification_thresholds, duplicated_prediction_indices) # type: ignore[arg-type] customer_thresholds = np.sum(confusion_matrix, axis=0) / y_score.shape[0] return float(classification_thresholds[np.argmin(np.abs(customer_thresholds - customer_threshold))])
def _compute_confusion_matrix( y_true: FloatNDArray, y_pred: FloatNDArray ) -> tuple[FloatNDArray, FloatNDArray, FloatNDArray]: # sort true labels and predictions by highest to the lowest predicted score sorted_indices = y_pred.argsort()[::-1] sorted_labels = y_true[sorted_indices] sorted_predictions = y_pred[sorted_indices] # calculate the TP & FP at each new lead targeted true_positives = np.pad(np.cumsum(sorted_labels), pad_width=(1, 0)) false_positives = np.pad(np.cumsum(sorted_labels == 0), pad_width=(1, 0)) # merge consecutive equal prediction values duplicated_prediction_indices = np.where(np.diff(sorted_predictions) == 0)[0] + 1 true_positives = np.delete(true_positives, duplicated_prediction_indices) false_positives = np.delete(false_positives, duplicated_prediction_indices) return np.array([true_positives, false_positives]), sorted_indices, duplicated_prediction_indices def _compute_prior_class_probabilities(y_true: FloatNDArray) -> tuple[float, float]: """Calculate prior class probabilities from target values.""" positive_class_prob = float(np.mean(y_true)) # pi_0 negative_class_prob = 1 - positive_class_prob # pi_1 return positive_class_prob, negative_class_prob def _compute_tpr_fpr_diffs( true_positive_rates: FloatNDArray, false_positive_rates: FloatNDArray ) -> tuple[FloatNDArray, FloatNDArray]: """Calculate differences between subsequent true positive rates and false positive rates.""" tpr_diff = np.diff(true_positive_rates, axis=0) # F_0(T_i) - F_0(T_{i-1}) fpr_diff = np.diff(false_positive_rates, axis=0) # F_1(T_i) - F_1(T_{i-1}) return tpr_diff, fpr_diff def _compute_profits( y_true: FloatNDArray, y_pred: FloatNDArray, cost_benefits: FloatNDArray ) -> tuple[FloatNDArray, FloatNDArray]: n_samples = y_pred.shape[0] confusion_matrix, _, _ = _compute_confusion_matrix(y_true, y_pred) profit_matrix = np.dot(confusion_matrix.T, cost_benefits) / n_samples customer_thresholds = np.sum(confusion_matrix, axis=0) / n_samples return profit_matrix, customer_thresholds