_mask.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. from contextlib import suppress
  2. import numpy as np
  3. from scipy import sparse as sp
  4. from . import is_scalar_nan
  5. from .fixes import _object_dtype_isnan
  6. def _get_dense_mask(X, value_to_mask):
  7. with suppress(ImportError, AttributeError):
  8. # We also suppress `AttributeError` because older versions of pandas do
  9. # not have `NA`.
  10. import pandas
  11. if value_to_mask is pandas.NA:
  12. return pandas.isna(X)
  13. if is_scalar_nan(value_to_mask):
  14. if X.dtype.kind == "f":
  15. Xt = np.isnan(X)
  16. elif X.dtype.kind in ("i", "u"):
  17. # can't have NaNs in integer array.
  18. Xt = np.zeros(X.shape, dtype=bool)
  19. else:
  20. # np.isnan does not work on object dtypes.
  21. Xt = _object_dtype_isnan(X)
  22. else:
  23. Xt = X == value_to_mask
  24. return Xt
  25. def _get_mask(X, value_to_mask):
  26. """Compute the boolean mask X == value_to_mask.
  27. Parameters
  28. ----------
  29. X : {ndarray, sparse matrix} of shape (n_samples, n_features)
  30. Input data, where ``n_samples`` is the number of samples and
  31. ``n_features`` is the number of features.
  32. value_to_mask : {int, float}
  33. The value which is to be masked in X.
  34. Returns
  35. -------
  36. X_mask : {ndarray, sparse matrix} of shape (n_samples, n_features)
  37. Missing mask.
  38. """
  39. if not sp.issparse(X):
  40. # For all cases apart of a sparse input where we need to reconstruct
  41. # a sparse output
  42. return _get_dense_mask(X, value_to_mask)
  43. Xt = _get_dense_mask(X.data, value_to_mask)
  44. sparse_constructor = sp.csr_matrix if X.format == "csr" else sp.csc_matrix
  45. Xt_sparse = sparse_constructor(
  46. (Xt, X.indices.copy(), X.indptr.copy()), shape=X.shape, dtype=bool
  47. )
  48. return Xt_sparse