Source code for empulse.metrics.acquisition.cost

from collections.abc import Callable, Sequence
from functools import partial, update_wrapper
from typing import TYPE_CHECKING, Literal, TypeVar, overload

import numpy as np
from scipy.special import expit

from ..._types import FloatArrayLike, FloatNDArray

if TYPE_CHECKING:  # pragma: no cover
    try:
        from lightgbm import Dataset
        from xgboost import DMatrix

        Matrix = TypeVar('Matrix', bound=FloatNDArray | DMatrix | Dataset)
    except ImportError:
        Matrix = TypeVar('Matrix', bound=FloatNDArray)  # type: ignore[misc]
else:
    Matrix = TypeVar('Matrix', bound=FloatNDArray)


from ._validation import _validate_input_deterministic


@overload
def make_objective_acquisition(
    model: Literal['catboost'],
    *,
    contribution: float = 7_000,
    contact_cost: float = 50,
    sales_cost: float = 500,
    direct_selling: float = 1,
    commission: float = 0.1,
) -> tuple['AECObjectiveAcquisition', 'AECMetricAcquisition']: ...


@overload
def make_objective_acquisition(
    model: Literal['xgboost', 'lightgbm'],
    *,
    contribution: float = 7_000,
    contact_cost: float = 50,
    sales_cost: float = 500,
    direct_selling: float = 1,
    commission: float = 0.1,
) -> Callable[[FloatNDArray, FloatNDArray], tuple[FloatNDArray, FloatNDArray]]: ...


[docs] def make_objective_acquisition( model: Literal['xgboost', 'lightgbm', 'catboost'], *, contribution: float = 7_000, contact_cost: float = 50, sales_cost: float = 500, direct_selling: float = 1, commission: float = 0.1, ) -> ( tuple['AECObjectiveAcquisition', 'AECMetricAcquisition'] | Callable[[FloatNDArray, FloatNDArray], tuple[FloatNDArray, FloatNDArray]] ): """ Create an objective function for the Expected Cost measure for customer acquisition. The objective function presumes a situation where leads are targeted either directly or indirectly. Directly targeted leads are contacted and handled by the internal sales team. Indirectly targeted leads are contacted and then referred to intermediaries, which receive a commission. The company gains a contribution from a successful acquisition. Read more in the :ref:`User Guide <cost_functions>`. Parameters ---------- model : {'xgboost', 'lightgbm', 'catboost'} The model for which the objective function is created. - 'xgboost' : :class:`xgboost:xgboost.XGBClassifier` - 'lightgbm' : :class:`lightgbm:lightgbm.LGBMClassifier` - 'catboost' : :class:`catboost.CatBoostClassifier` contribution : float, default=7000 Average contribution of a new customer (``contribution ≥ 0``). sales_cost : float, default=500 Average sale conversion cost of targeted leads handled by the company (``sales_cost ≥ 0``). contact_cost : float, default=50 Average contact cost of targeted leads (``contact_cost ≥ 0``). direct_selling : float, default=1 Fraction of leads sold to directly (``0 ≤ direct_selling ≤ 1``). ``direct_selling = 0`` for indirect channel. ``direct_selling = 1`` for direct channel. commission : float, default=0.1 Fraction of contribution paid to the intermediaries (``0 ≤ commission ≤ 1``). .. note:: The commission is only relevant when there is an indirect channel (``direct_selling < 1``). Returns ------- objective : Callable A custom objective function for XGBoost. Examples -------- .. code-block:: python from xgboost import XGBClassifier from empulse.metrics import make_objective_acquisition objective = make_objective_acquisition(model='xgboost') clf = XGBClassifier(objective=objective, n_estimators=100, max_depth=3) References ---------- .. [1] Janssens, B., Bogaert, M., Bagué, A., & Van den Poel, D. (2022). B2Boost: Instance-dependent profit-driven modelling of B2B churn. Annals of Operations Research, 1-27. """ if model == 'xgboost': objective: Callable[[FloatNDArray, FloatNDArray], tuple[FloatNDArray, FloatNDArray]] = partial( _objective, contribution=contribution, contact_cost=contact_cost, sales_cost=sales_cost, direct_selling=direct_selling, commission=commission, ) update_wrapper(objective, _objective) elif model == 'lightgbm': def objective(y_true: FloatNDArray, y_score: FloatNDArray) -> tuple[FloatNDArray, FloatNDArray]: """ Create an objective function for the churn AEC measure. Parameters ---------- y_true : np.ndarray Ground truth labels. y_score : np.ndarray Predicted values. Returns ------- gradient : np.ndarray Gradient of the objective function. hessian : np.ndarray Hessian of the objective function. """ return _objective( y_true, y_score, contribution=contribution, contact_cost=contact_cost, sales_cost=sales_cost, direct_selling=direct_selling, commission=commission, ) elif model == 'catboost': return ( AECObjectiveAcquisition( contribution=contribution, contact_cost=contact_cost, sales_cost=sales_cost, direct_selling=direct_selling, commission=commission, ), AECMetricAcquisition( contribution=contribution, contact_cost=contact_cost, sales_cost=sales_cost, direct_selling=direct_selling, commission=commission, ), ) else: raise ValueError(f"Expected model to be 'xgboost' or 'lightgbm', got {model} instead.") return objective
def _objective( y_true: FloatNDArray, y_score: FloatNDArray, contribution: float = 7_000, contact_cost: float = 50, sales_cost: float = 500, direct_selling: float = 1, commission: float = 0.1, ) -> tuple[FloatNDArray, FloatNDArray]: """ Create an objective function for `XGBoostClassifier` for customer acquisition. Parameters ---------- y_true : np.ndarray Ground truth labels. y_score : np.ndarray Predicted values. Returns ------- gradient : np.ndarray Gradient of the objective function. hessian : np.ndarray Hessian of the objective function. """ y_proba = expit(y_score) cost = ( y_true * ( direct_selling * (contact_cost + sales_cost - contribution) + (1 - direct_selling) * (contact_cost - (1 - commission) * contribution) ) + (1 - y_true) * contact_cost ) gradient = y_proba * (1 - y_proba) * cost hessian = np.abs((1 - 2 * y_proba) * gradient) return gradient, hessian class AECObjectiveAcquisition: """AEC acquisition objective for catboost.""" def __init__( self, contribution: float = 7_000, contact_cost: float = 50, sales_cost: float = 500, direct_selling: float = 1, commission: float = 0.1, ): self.contribution = contribution self.sales_cost = sales_cost self.contact_cost = contact_cost self.direct_selling = direct_selling self.commission = commission def calc_ders_range( self, predictions: Sequence[float], targets: FloatNDArray, weights: Sequence[float] ) -> list[tuple[float, float]]: """ Compute first and second derivative of the loss function with respect to the predicted value for each object. Parameters ---------- predictions : indexed container of floats Current predictions for each object. targets : indexed container of floats Target values you provided with the dataset. weights : float, optional (default=None) Instance weight. Returns ------- der1 : list-like object of float der2 : list-like object of float """ y_proba = expit(predictions) cost = ( targets * ( self.direct_selling * (self.contact_cost + self.sales_cost - self.contribution) + (1 - self.direct_selling) * (self.contact_cost - (1 - self.commission) * self.contribution) ) + (1 - targets) * self.contact_cost ) gradient = y_proba * (1 - y_proba) * cost hessian = np.abs((1 - 2 * y_proba) * gradient) return list(zip(-gradient, -hessian, strict=False)) class AECMetricAcquisition: """AEC acquisition metric for catboost.""" def __init__( self, contribution: float = 7_000, contact_cost: float = 50, sales_cost: float = 500, direct_selling: float = 1, commission: float = 0.1, ): self.contribution = contribution self.sales_cost = sales_cost self.contact_cost = contact_cost self.direct_selling = direct_selling self.commission = commission def is_max_optimal(self) -> bool: """Return whether great values of metric are better.""" return False def evaluate( self, predictions: Sequence[float], targets: Sequence[float], weights: Sequence[float] ) -> tuple[float, float]: """ Evaluate metric value. Parameters ---------- predictions : list of indexed containers (containers with only __len__ and __getitem__ defined) of float Vectors of approx labels. targets : one dimensional indexed container of float Vectors of true labels. weights : one dimensional indexed container of float, optional (default=None) Weight for each instance. Returns ------- weighted error : float total weight : float """ y_proba = expit(predictions) return expected_cost_loss_acquisition( targets, y_proba, contribution=self.contribution, contact_cost=self.contact_cost, sales_cost=self.sales_cost, direct_selling=self.direct_selling, commission=self.commission, normalize=True, check_input=False, ), 1 def get_final_error(self, error: float, weight: float) -> float: """ Return final value of metric based on error and weight. Parameters ---------- error : float Sum of errors in all instances. weight : float Sum of weights of all instances. Returns ------- metric value : float """ return error
[docs] def expected_cost_loss_acquisition( y_true: FloatArrayLike, y_proba: FloatArrayLike, *, contribution: float = 7_000, contact_cost: float = 50, sales_cost: float = 500, direct_selling: float = 1, commission: float = 0.1, normalize: bool = False, check_input: bool = True, ) -> float: """ Expected cost of a classifier for customer acquisition. The cost function presumes a situation where leads are targeted either directly or indirectly. Directly targeted leads are contacted and handled by the internal sales team. Indirectly targeted leads are contacted and then referred to intermediaries, which receive a commission. The company gains a contribution from a successful acquisition. Parameters ---------- y_true : 1D array-like, shape=(n_samples,) Binary target values ('churn': 1, 'no churn': 0). y_proba : 1D array-like, shape=(n_samples,) Target probabilities, should lie between 0 and 1. contribution : float, default=7000 Average contribution of a new customer (``contribution ≥ 0``). sales_cost : float, default=500 Average sale conversion cost of targeted leads handled by the company (``sales_cost ≥ 0``). contact_cost : float, default=50 Average contact cost of targeted leads (``contact_cost ≥ 0``). direct_selling : float, default=1 Fraction of leads sold to directly (``0 ≤ direct_selling ≤ 1``). ``direct_selling = 0`` for indirect channel. ``direct_selling = 1`` for direct channel. commission : float, default=0.1 Fraction of contribution paid to the intermediaries (``0 ≤ commission ≤ 1``). .. note:: The commission is only relevant when there is an indirect channel (``direct_selling < 1``). normalize : bool, default=True Normalize the cost function by the number of samples. If ``True``, return the average expected cost for customer acquisition. check_input : bool, default=True Perform input validation. Turning off improves performance, useful when using this metric as a loss function. Returns ------- empa_cost : float Instance-specific cost function according to the EMPA measure. """ # noqa: D401 if check_input: y_true, y_proba = _validate_input_deterministic( y_true, y_proba, contribution, contact_cost, sales_cost, direct_selling, commission ) else: y_true = np.asarray(y_true) y_proba = np.asarray(y_proba) costs = ( y_true * y_proba * ( direct_selling * (sales_cost + contact_cost - contribution) + (1 - direct_selling) * (contact_cost - (1 - commission) * contribution) ) + (1 - y_true) * y_proba * contact_cost ) if normalize: return float(np.mean(costs)) return float(np.sum(costs))