| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- from contextlib import suppress
- import numpy as np
- from scipy import sparse as sp
- from . import is_scalar_nan
- from .fixes import _object_dtype_isnan
- def _get_dense_mask(X, value_to_mask):
- with suppress(ImportError, AttributeError):
- # We also suppress `AttributeError` because older versions of pandas do
- # not have `NA`.
- import pandas
- if value_to_mask is pandas.NA:
- return pandas.isna(X)
- if is_scalar_nan(value_to_mask):
- if X.dtype.kind == "f":
- Xt = np.isnan(X)
- elif X.dtype.kind in ("i", "u"):
- # can't have NaNs in integer array.
- Xt = np.zeros(X.shape, dtype=bool)
- else:
- # np.isnan does not work on object dtypes.
- Xt = _object_dtype_isnan(X)
- else:
- Xt = X == value_to_mask
- return Xt
- def _get_mask(X, value_to_mask):
- """Compute the boolean mask X == value_to_mask.
- Parameters
- ----------
- X : {ndarray, sparse matrix} of shape (n_samples, n_features)
- Input data, where ``n_samples`` is the number of samples and
- ``n_features`` is the number of features.
- value_to_mask : {int, float}
- The value which is to be masked in X.
- Returns
- -------
- X_mask : {ndarray, sparse matrix} of shape (n_samples, n_features)
- Missing mask.
- """
- if not sp.issparse(X):
- # For all cases apart of a sparse input where we need to reconstruct
- # a sparse output
- return _get_dense_mask(X, value_to_mask)
- Xt = _get_dense_mask(X.data, value_to_mask)
- sparse_constructor = sp.csr_matrix if X.format == "csr" else sp.csc_matrix
- Xt_sparse = sparse_constructor(
- (Xt, X.indices.copy(), X.indptr.copy()), shape=X.shape, dtype=bool
- )
- return Xt_sparse
|