Source code for empulse.metrics.metric.cost_matrix

from collections.abc import MutableMapping
from typing import Any, Self

import sympy


[docs] class CostMatrix: """ Class to create a custom value/cost-sensitive cost matrix. You add the costs and benefits that make up the cost matrix for each case (true positive, true negative, false positive, false negative). The costs and benefits are specified using sympy symbols or expressions. Stochastic variables are supported and can be specified using sympy.stats random variables. Stochastic variables are assumed to be independent of each other. Read more in the :ref:`User Guide <user_defined_value_metric>`. Attributes ---------- tp_benefit : sympy.Expr The benefit of a true positive. See :meth:`~empulse.metrics.CostMatrix.add_tp_benefit` for more details. tn_benefit : sympy.Expr The benefit of a true negative. See :meth:`~empulse.metrics.CostMatrix.add_tn_benefit` for more details. fp_benefit : sympy.Expr The benefit of a false positive. See :meth:`~empulse.metrics.CostMatrix.add_fp_benefit` for more details. fn_benefit : sympy.Expr The benefit of a false negative. See :meth:`~empulse.metrics.CostMatrix.add_fn_benefit` for more details. tp_cost : sympy.Expr The cost of a true positive. See :meth:`~empulse.metrics.CostMatrix.add_tp_cost` for more details. tn_cost : sympy.Expr The cost of a true negative. See :meth:`~empulse.metrics.CostMatrix.add_tn_cost` for more details. fp_cost : sympy.Expr The cost of a false positive. See :meth:`~empulse.metrics.CostMatrix.add_fp_cost` for more details. fn_cost : sympy.Expr The cost of a false negative. See :meth:`~empulse.metrics.CostMatrix.add_fn_cost` for more details. Examples -------- Reimplementing the :func:`~empulse.metrics.empc_score` cost matrix. .. code-block:: python import sympy as sp from empulse.metrics import CostMatrix clv, d, f, alpha, beta = sp.symbols( 'clv d f alpha beta' ) # define deterministic variables gamma = sp.stats.Beta('gamma', alpha, beta) # define gamma to follow a Beta distribution cost_matrix = ( CostMatrix() .add_tp_benefit(gamma * (clv - d - f)) # when churner accepts offer .add_tp_benefit((1 - gamma) * -f) # when churner does not accept offer .add_fp_cost(d + f) # when you send an offer to a non-churner .alias({'incentive_cost': 'd', 'contact_cost': 'f'}) ) """ def __init__(self) -> None: self._tp_benefit: sympy.Expr = sympy.core.numbers.Zero() self._tn_benefit: sympy.Expr = sympy.core.numbers.Zero() self._fp_cost: sympy.Expr = sympy.core.numbers.Zero() self._fn_cost: sympy.Expr = sympy.core.numbers.Zero() self._aliases: MutableMapping[str, str | sympy.Symbol] = {} self._defaults: dict[str, Any] = {} self._outlier_sensitive_symbols: set[sympy.Symbol] = set() @property def tp_benefit(self) -> sympy.Expr: # noqa: D102 return self._tp_benefit @property def tn_benefit(self) -> sympy.Expr: # noqa: D102 return self._tn_benefit @property def fp_benefit(self) -> sympy.Expr: # noqa: D102 return -self._fp_cost @property def fn_benefit(self) -> sympy.Expr: # noqa: D102 return -self._fn_cost @property def tp_cost(self) -> sympy.Expr: # noqa: D102 return -self._tp_benefit @property def tn_cost(self) -> sympy.Expr: # noqa: D102 return -self._tn_benefit @property def fp_cost(self) -> sympy.Expr: # noqa: D102 return self._fp_cost @property def fn_cost(self) -> sympy.Expr: # noqa: D102 return self._fn_cost
[docs] def add_tp_benefit(self, term: sympy.Expr | str) -> Self: """ Add a term to the benefit of classifying a true positive. Parameters ---------- term: sympy.Expr | str The term to add to the benefit of classifying a true positive. Returns ------- CostMatrix """ if isinstance(term, str): term = sympy.sympify(term) self._tp_benefit += term return self
[docs] def add_tn_benefit(self, term: sympy.Expr | str) -> Self: """ Add a term to the benefit of classifying a true negative. Parameters ---------- term: sympy.Expr | str The term to add to the benefit of classifying a true negative. Returns ------- CostMatrix """ if isinstance(term, str): term = sympy.sympify(term) self._tn_benefit += term return self
[docs] def add_fp_benefit(self, term: sympy.Expr | str) -> Self: """ Add a term to the benefit of classifying a false positive. Parameters ---------- term: sympy.Expr | str The term to add to the benefit of classifying a false positive. Returns ------- CostMatrix """ if isinstance(term, str): term = sympy.sympify(term) self._fp_cost -= term return self
[docs] def add_fn_benefit(self, term: sympy.Expr | str) -> Self: """ Add a term to the benefit of classifying a false negative. Parameters ---------- term: sympy.Expr | str The term to add to the benefit of classifying a false negative. Returns ------- CostMatrix """ if isinstance(term, str): term = sympy.sympify(term) self._fn_cost -= term return self
[docs] def add_tp_cost(self, term: sympy.Expr | str) -> Self: """ Add a term to the cost of classifying a true positive. Parameters ---------- term: sympy.Expr | str The term to add to the cost of classifying a true positive. Returns ------- CostMatrix """ if isinstance(term, str): term = sympy.sympify(term) self._tp_benefit -= term return self
[docs] def add_tn_cost(self, term: sympy.Expr | str) -> Self: """ Add a term to the cost of classifying a true negative. Parameters ---------- term: sympy.Expr | str The term to add to the cost of classifying a true negative. Returns ------- CostMatrix """ if isinstance(term, str): term = sympy.sympify(term) self._tn_benefit -= term return self
[docs] def add_fp_cost(self, term: sympy.Expr | str) -> Self: """ Add a term to the cost of classifying a false positive. Parameters ---------- term: sympy.Expr | str The term to add to the cost of classifying a false positive. Returns ------- CostMatrix """ if isinstance(term, str): term = sympy.sympify(term) self._fp_cost += term return self
[docs] def add_fn_cost(self, term: sympy.Expr | str) -> Self: """ Add a term to the cost of classifying a false negative. Parameters ---------- term: sympy.Expr | str The term to add to the cost of classifying a false negative. Returns ------- CostMatrix """ if isinstance(term, str): term = sympy.sympify(term) self._fn_cost += term return self
[docs] def alias(self, alias: str | MutableMapping[str, sympy.Symbol | str], symbol: sympy.Symbol | None = None) -> Self: """ Add an alias for a symbol. Parameters ---------- alias: str | MutableMapping[str, sympy.Symbol | str] The alias to add. If a MutableMapping (e.g., dictionary) is passed, the keys are the aliases and the values are the symbols. symbol: sympy.Symbol, optional The symbol to alias to. Returns ------- CostMatrix Examples -------- .. code-block:: python import sympy as sp from empulse.metrics import Metric, Cost clv, delta, f, gamma = sp.symbols('clv delta f gamma') cost_matrix = ( CostMatrix() .add_tp_benefit(gamma * (clv - delta * clv - f)) # when churner accepts offer .add_tp_benefit((1 - gamma) * -f) # when churner does not accept offer .add_fp_cost(delta * clv + f) # when you send an offer to a non-churner .alias({'incentive_fraction': 'delta', 'contact_cost': 'f', 'accept_rate': 'gamma'}) ) cost_loss = Metric(cost_matrix, Cost()) y_true = [1, 0, 1, 0, 1] y_proba = [0.9, 0.1, 0.8, 0.2, 0.7] cost_loss( y_true, y_proba, clv=100, incentive_fraction=0.05, contact_cost=1, accept_rate=0.3 ) """ if isinstance(alias, MutableMapping): self._aliases.update(alias) elif symbol is not None: self._aliases[alias] = str(symbol) else: raise ValueError('Either a dictionary or both an alias and a symbol should be provided') return self
[docs] def set_default(self, **defaults: float) -> Self: """ Set default values for symbols or their aliases. Parameters ---------- defaults: float Default values for symbols or their aliases. These default values will be used if not provided in __call__. Returns ------- CostMatrix Examples -------- .. code-block:: python import sympy as sp from empulse.metrics import Metric, Cost clv, delta, f, gamma = sp.symbols('clv delta f gamma') cost_matrix = ( CostMatrix() .add_tp_benefit(gamma * (clv - delta * clv - f)) # when churner accepts offer .add_tp_benefit((1 - gamma) * -f) # when churner does not accept offer .add_fp_cost(delta * clv + f) # when you send an offer to a non-churner .alias({'incentive_fraction': 'delta', 'contact_cost': 'f', 'accept_rate': 'gamma'}) .set_default(incentive_fraction=0.05, contact_cost=1, accept_rate=0.3) ) cost_loss = Metric(cost_matrix, Cost()) y_true = [1, 0, 1, 0, 1] y_proba = [0.9, 0.1, 0.8, 0.2, 0.7] cost_loss(y_true, y_proba, clv=100, incentive_fraction=0.1) """ # Convert aliases to symbol names before storing defaults converted_defaults = {} for key, value in defaults.items(): if key in self._aliases: symbol_name = str(self._aliases[key]) converted_defaults[symbol_name] = value else: converted_defaults[key] = value self._defaults.update(converted_defaults) return self
[docs] def mark_outlier_sensitive(self, symbol: str | sympy.Symbol) -> Self: """ Mark a symbol as outlier-sensitive. This is used to indicate that the symbol is sensitive to outliers. When the metric is used as a loss function or criterion for training a model, :class:`~empulse.models.RobustCSClassifier` will impute outliers for this symbol's value. This is ignored when not using a :class:`~empulse.models.RobustCSClassifier` model. Parameters ---------- symbol: str | sympy.Symbol The symbol to mark as outlier-sensitive. Returns ------- CostMatrix Examples -------- .. code-block:: python import numpy as np import sympy as sp from empulse.metrics import Metric, Cost from empulse.models import CSLogitClassifier, RobustCSClassifier from sklearn.datasets import make_classification X, y = make_classification() a, b = sp.symbols('a b') cost_matrix = CostMatrix().add_fp_cost(a).add_fn_cost(b).mark_outlier_sensitive(a) cost_loss = Metric(cost_matrix, Cost()) fn_cost = np.random.rand(y.size) model = RobustCSClassifier(CSLogitClassifier(loss=cost_loss)) model.fit(X, y, a=np.random.rand(y.size), b=5) """ if isinstance(symbol, str): symbol = sympy.sympify(symbol) if not isinstance(symbol, sympy.Symbol): raise TypeError('The symbol must be a sympy.Symbol or a string that can be converted to a sympy.Symbol') self._outlier_sensitive_symbols.add(symbol) return self
def __repr__(self) -> str: return ( f'{self.__class__.__name__}(' f'tp_cost={self.tp_cost}, tn_cost={self.tn_cost}, ' f'fp_cost={self.fp_cost}, fn_cost={self.fn_cost})' ) def _repr_latex_(self) -> str: return ( # type: ignore[no-any-return] r""" \begin{array}{c|cc} & y=0 & y=1 \\ \hline \hat y=0 & \text{""" + self.tn_cost._repr_latex_() + r"""} & \text{""" + self.fn_cost._repr_latex_() + r"""} \\ \hat y=1 & \text{""" + self.fp_cost._repr_latex_() + r"""} & \text{""" + self.tp_cost._repr_latex_() + r"""} \\ \end{array} """ )