Source code for daal4py.mb.logistic_regression_builders

# Copyright contributors to the oneDAL project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import sys

import numpy as np

from .. import (
    classifier_prediction_result,
    logistic_regression_model_builder,
    logistic_regression_prediction,
)

_docstring_X = """Parameters
----------
X : array-like(n_samples, n_features)
    The features / covariates for each row. Can be passed as either a NumPy array
    or as a sparse CSR array/matrix from SciPy. For faster results, use the same
    dtype as what this object was built for."""
if (sys.version_info.major == 3) and (sys.version_info.minor <= 12):
    _docstring_X = re.sub("^", " " * 8, _docstring_X, flags=re.MULTILINE).strip()


[docs]class LogisticDAALModel:
    """
    Logistic Regression Predictor

    Creates a logistic regression or multionomial logistic regression model object
    which can calculate fast predictions of different types (classes, probabilities,
    logarithms of probabilities), from fitted coefficients and intercepts obtained
    elsewhere (such as from :obj:`sklearn.linear_model.LogisticRegression`), making
    the predictions either in double (``np.float64``) or single (``np.float32``)
    precision.

    See Also
    --------
    :obj:`sklearn.linear_model.LogisticRegression`, :obj:`sklearn.linear_model.SGDClassifier`,
    :obj:`daal4py.classifier_prediction_result`.

    Parameters
    ----------
    coefs : array(n_classes, n_features) or array(n_features,)
        The fitted model coefficients. Note that only dense arrays are supported.
        In the case of binary classification, can be passed as a 1D array or as a
        2D array having a single row.
    intercepts: array(n_classes) or float
        The fitted intercepts. In the case of binary classification, must be passed
        as either a scalar, or as a 1D array with a single entry.
    dtype : np.float32 or np.float64
        The dtype to use for the object.

    Attributes
    ----------
    n_classes_ : int
        Number of classes in the model.
    n_features_in_ : int
        Number of features in the model.
    dtype_ : np.dtype
        The dtype of the model
    coef_ : array(n_classes, n_features)
        The model coefficients
    intercept_ : array(n_classes)
        The model intercepts
    """

    def __init__(self, coefs, intercepts, dtype=np.float64):
        assert dtype in [np.float32, np.float64]
        coefs = np.require(coefs, requirements=["ENSUREARRAY"])
        if len(coefs.shape) == 1:
            coefs = coefs.reshape((1, -1))
        self.n_features_in_ = coefs.shape[1]
        self.n_classes_ = max(2, coefs.shape[0])
        intercepts = np.require(intercepts, requirements=["ENSUREARRAY"]).reshape(-1)
        if self.n_classes_ == 2:
            assert len(intercepts) == 1
        else:
            assert intercepts.shape[0] == coefs.shape[0]
        self._fptype = "float" if dtype == np.float32 else "double"
        self.dtype_ = dtype
        if coefs.dtype != self.dtype_:
            coefs = coefs.astype(self.dtype_)
        if intercepts.dtype != self.dtype_:
            intercepts = intercepts.astype(self.dtype_)
        builder = logistic_regression_model_builder(
            n_classes=self.n_classes_, n_features=coefs.shape[1]
        )
        builder.set_beta(coefs, intercepts)
        self._model = builder.model
        self._alg_pred_class = logistic_regression_prediction(
            nClasses=self.n_classes_,
            fptype=self._fptype,
            resultsToEvaluate="computeClassLabels",
        )
        self._alg_pred_prob = logistic_regression_prediction(
            nClasses=self.n_classes_,
            fptype=self._fptype,
            resultsToEvaluate="computeClassProbabilities",
        )
        self._alg_pred_logprob = logistic_regression_prediction(
            nClasses=self.n_classes_,
            fptype=self._fptype,
            resultsToEvaluate="computeClassLogProbabilities",
        )

    @property
    def coef_(self):
        return self._model.Beta[:, 1:]

    @property
    def intercept_(self):
        return self._model.Beta[:, 0]

[docs]    def predict(self, X) -> np.ndarray:
        """
        Predict most probable class

        %docstring_X%

        Returns
        -------
        classes : array(n_samples,)
            The most probable class, as integer indexes
        """
        return (
            self._alg_pred_class.compute(X, self._model)
            .prediction.reshape(-1)
            .astype(int)
        )

    predict.__doc__ = predict.__doc__.replace(r"%docstring_X%", _docstring_X)

[docs]    def predict_proba(self, X) -> np.ndarray:
        """
        Predict probabilities of belonging to each class

        %docstring_X%

        Returns
        -------
        proba : array(n_samples, n_classes)
            The predicted probabilities for each class.
        """
        return self._alg_pred_prob.compute(X, self._model).probabilities

    predict_proba.__doc__ = predict_proba.__doc__.replace(r"%docstring_X%", _docstring_X)

[docs]    def predict_log_proba(self, X) -> np.ndarray:
        """
        Predict log-probabilities of belonging to each class

        %docstring_X%

        Returns
        -------
        log_proba : array(n_samples, n_classes)
            The logarithms of the predicted probabilities for each class.
        """
        return self._alg_pred_logprob.compute(X, self._model).logProbabilities

    predict_log_proba.__doc__ = predict_log_proba.__doc__.replace(
        r"%docstring_X%", _docstring_X
    )

[docs]    def predict_multiple(
        self, X, classes: bool = True, proba: bool = True, log_proba: bool = True
    ) -> classifier_prediction_result:
        """
        Make multiple prediction types at once

        A method that can output the results from ``predict``, ``predict_proba``, and ``predict_log_proba``
        all together in the same call more efficiently than computing them independently.

        %docstring_X%
        classes : bool
            Whether to output class predictions (what is obtained from :meth:`predict`).
        proba : bool
            Whether to output per-class probability predictions (what is obtained from
            :meth:`predict_proba`).
        log_proba : bool
            Whether to output per-class logarithms of probabilities (what is obtained
            from :meth:`predict_log_proba`).

        Returns
        -------
        predictions : classifier_prediction_result
            An object of class :obj:`daal4py.classifier_prediction_result` with the requested
            prediction types for the same ``X`` data.
        """
        pred_request = "|".join(
            (["computeClassLabels"] if classes else [])
            + (["computeClassProbabilities"] if proba else [])
            + (["computeClassLogProbabilities"] if log_proba else [])
        )
        if not len(pred_request):
            raise ValueError(
                "Must request at least one of 'classes', 'proba', 'log_proba'."
            )
        return logistic_regression_prediction(
            nClasses=self.n_classes_,
            fptype=self._fptype,
            resultsToEvaluate=pred_request,
        ).compute(X, self._model)

    predict_multiple.__doc__ = predict_multiple.__doc__.replace(
        r"%docstring_X%", _docstring_X
    )