Source code for sklearnex.basic_statistics.basic_statistics

# ==============================================================================
# Copyright 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from sklearn.base import BaseEstimator

from daal4py.sklearn._n_jobs_support import control_n_jobs
from daal4py.sklearn._utils import daal_check_version, is_sparse, sklearn_check_version
from onedal.basic_statistics import BasicStatistics as onedal_BasicStatistics
from onedal.utils.validation import _is_csr

from .._device_offload import dispatch
from .._utils import PatchingConditionsChain
from ..base import oneDALEstimator
from ..utils._array_api import enable_array_api, get_namespace
from ..utils.validation import _check_sample_weight, validate_data

if sklearn_check_version("1.2"):
    from sklearn.utils._param_validation import StrOptions


[docs] @enable_array_api @control_n_jobs(decorated_methods=["fit"]) class BasicStatistics(oneDALEstimator, BaseEstimator): """ Estimator for basic statistics. Compute low order moments and related statistics for given data. Parameters ---------- result_options : str or list, default=str('all') Used to set statistics to calculate. Possible values are ``'min'``, ``'max'``, ``'sum'``, ``'mean'``, ``'variance'``, ``'variation'``, ``sum_squares'``, ``sum_squares_centered'``, ``'standard_deviation'``, ``'second_order_raw_moment'`` or a list containing any of these values. If set to ``'all'`` then all possible statistics will be calculated. Attributes ---------- min_ : ndarray of shape (n_features,) Minimum of each feature over all samples. max_ : ndarray of shape (n_features,) Maximum of each feature over all samples. sum_ : ndarray of shape (n_features,) Sum of each feature over all samples. mean_ : ndarray of shape (n_features,) Mean of each feature over all samples. variance_ : ndarray of shape (n_features,) Variance of each feature over all samples. Bessel's correction is used. variation_ : ndarray of shape (n_features,) Variation of each feature over all samples. Bessel's correction is used. sum_squares_ : ndarray of shape (n_features,) Sum of squares for each feature over all samples. standard_deviation_ : ndarray of shape (n_features,) Unbiased standard deviation of each feature over all samples. Bessel's correction is used. sum_squares_centered_ : ndarray of shape (n_features,) Centered sum of squares for each feature over all samples. second_order_raw_moment_ : ndarray of shape (n_features,) Second order moment of each feature over all samples. Notes ----- Attribute exists only if corresponding result option has been provided. Some results can exhibit small variations due to floating point error accumulation and multithreading. Examples -------- >>> import numpy as np >>> from sklearnex.basic_statistics import BasicStatistics >>> bs = BasicStatistics(result_options=['sum', 'min', 'max']) >>> X = np.array([[1, 2], [3, 4]]) >>> bs.fit(X) >>> bs.sum_ np.array([4., 6.]) >>> bs.min_ np.array([1., 2.]) """ def __init__(self, result_options="all"): self.result_options = result_options _onedal_basic_statistics = staticmethod(onedal_BasicStatistics) if sklearn_check_version("1.2"): _parameter_constraints: dict = { "result_options": [ StrOptions( { "all", "min", "max", "sum", "mean", "variance", "variation", "sum_squares", "standard_deviation", "sum_squares_centered", "second_order_raw_moment", } ), list, ], } def _save_attributes(self): assert hasattr(self, "_onedal_estimator") for option in self._onedal_estimator.options: option += "_" setattr(self, option, getattr(self._onedal_estimator, option)) def _onedal_cpu_supported(self, method_name, *data): patching_status = PatchingConditionsChain( f"sklearnex.basic_statistics.{self.__class__.__name__}.{method_name}" ) return patching_status def _onedal_gpu_supported(self, method_name, *data): patching_status = PatchingConditionsChain( f"sklearnex.basic_statistics.{self.__class__.__name__}.{method_name}" ) X, sample_weight = data is_data_supported = not is_sparse(X) or ( _is_csr(X) and daal_check_version((2025, "P", 200)) ) is_sample_weight_supported = sample_weight is None or not is_sparse(X) patching_status.and_conditions( [ ( is_sample_weight_supported, "Sample weights are not supported for CSR data format", ), ( is_data_supported, "Supported data formats: Dense, CSR (oneDAL version >= 2025.2.0).", ), ] ) return patching_status def _onedal_fit(self, X, sample_weight=None, queue=None): xp, _ = get_namespace(X, sample_weight) X = validate_data( self, X, dtype=[xp.float64, xp.float32], ensure_2d=False, accept_sparse="csr", ) if sample_weight is not None: sample_weight = _check_sample_weight( sample_weight, X, dtype=[xp.float64, xp.float32] ) onedal_params = { "result_options": self.result_options, } if not hasattr(self, "_onedal_estimator"): self._onedal_estimator = self._onedal_basic_statistics(**onedal_params) self._onedal_estimator.fit(X, sample_weight, queue=queue) self._save_attributes() self.n_features_in_ = X.shape[1] if len(X.shape) > 1 else 1
[docs] def fit(self, X, y=None, sample_weight=None): """Calculate statistics of X. Parameters ---------- X : array-like of shape (n_samples, n_features) Data for compute, where ``n_samples`` is the number of samples and ``n_features`` is the number of features. y : Ignored Not used, present for API consistency by convention. sample_weight : array-like of shape (n_samples,), default=None Weights for compute weighted statistics, where ``n_samples`` is the number of samples. Returns ------- self : object Returns the instance itself. """ if sklearn_check_version("1.2"): self._validate_params() dispatch( self, "fit", { "onedal": self.__class__._onedal_fit, "sklearn": None, }, X, sample_weight, ) return self