| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 |
- import warnings
- import numpy as np
- from ..base import BaseEstimator, TransformerMixin, _fit_context
- from ..utils._param_validation import StrOptions
- from ..utils.metaestimators import available_if
- from ..utils.validation import (
- _allclose_dense_sparse,
- _check_feature_names_in,
- check_array,
- )
- def _identity(X):
- """The identity function."""
- return X
- class FunctionTransformer(TransformerMixin, BaseEstimator):
- """Constructs a transformer from an arbitrary callable.
- A FunctionTransformer forwards its X (and optionally y) arguments to a
- user-defined function or function object and returns the result of this
- function. This is useful for stateless transformations such as taking the
- log of frequencies, doing custom scaling, etc.
- Note: If a lambda is used as the function, then the resulting
- transformer will not be pickleable.
- .. versionadded:: 0.17
- Read more in the :ref:`User Guide <function_transformer>`.
- Parameters
- ----------
- func : callable, default=None
- The callable to use for the transformation. This will be passed
- the same arguments as transform, with args and kwargs forwarded.
- If func is None, then func will be the identity function.
- inverse_func : callable, default=None
- The callable to use for the inverse transformation. This will be
- passed the same arguments as inverse transform, with args and
- kwargs forwarded. If inverse_func is None, then inverse_func
- will be the identity function.
- validate : bool, default=False
- Indicate that the input X array should be checked before calling
- ``func``. The possibilities are:
- - If False, there is no input validation.
- - If True, then X will be converted to a 2-dimensional NumPy array or
- sparse matrix. If the conversion is not possible an exception is
- raised.
- .. versionchanged:: 0.22
- The default of ``validate`` changed from True to False.
- accept_sparse : bool, default=False
- Indicate that func accepts a sparse matrix as input. If validate is
- False, this has no effect. Otherwise, if accept_sparse is false,
- sparse matrix inputs will cause an exception to be raised.
- check_inverse : bool, default=True
- Whether to check that or ``func`` followed by ``inverse_func`` leads to
- the original inputs. It can be used for a sanity check, raising a
- warning when the condition is not fulfilled.
- .. versionadded:: 0.20
- feature_names_out : callable, 'one-to-one' or None, default=None
- Determines the list of feature names that will be returned by the
- `get_feature_names_out` method. If it is 'one-to-one', then the output
- feature names will be equal to the input feature names. If it is a
- callable, then it must take two positional arguments: this
- `FunctionTransformer` (`self`) and an array-like of input feature names
- (`input_features`). It must return an array-like of output feature
- names. The `get_feature_names_out` method is only defined if
- `feature_names_out` is not None.
- See ``get_feature_names_out`` for more details.
- .. versionadded:: 1.1
- kw_args : dict, default=None
- Dictionary of additional keyword arguments to pass to func.
- .. versionadded:: 0.18
- inv_kw_args : dict, default=None
- Dictionary of additional keyword arguments to pass to inverse_func.
- .. versionadded:: 0.18
- Attributes
- ----------
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X` has feature
- names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- MaxAbsScaler : Scale each feature by its maximum absolute value.
- StandardScaler : Standardize features by removing the mean and
- scaling to unit variance.
- LabelBinarizer : Binarize labels in a one-vs-all fashion.
- MultiLabelBinarizer : Transform between iterable of iterables
- and a multilabel format.
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.preprocessing import FunctionTransformer
- >>> transformer = FunctionTransformer(np.log1p)
- >>> X = np.array([[0, 1], [2, 3]])
- >>> transformer.transform(X)
- array([[0. , 0.6931...],
- [1.0986..., 1.3862...]])
- """
- _parameter_constraints: dict = {
- "func": [callable, None],
- "inverse_func": [callable, None],
- "validate": ["boolean"],
- "accept_sparse": ["boolean"],
- "check_inverse": ["boolean"],
- "feature_names_out": [callable, StrOptions({"one-to-one"}), None],
- "kw_args": [dict, None],
- "inv_kw_args": [dict, None],
- }
- def __init__(
- self,
- func=None,
- inverse_func=None,
- *,
- validate=False,
- accept_sparse=False,
- check_inverse=True,
- feature_names_out=None,
- kw_args=None,
- inv_kw_args=None,
- ):
- self.func = func
- self.inverse_func = inverse_func
- self.validate = validate
- self.accept_sparse = accept_sparse
- self.check_inverse = check_inverse
- self.feature_names_out = feature_names_out
- self.kw_args = kw_args
- self.inv_kw_args = inv_kw_args
- def _check_input(self, X, *, reset):
- if self.validate:
- return self._validate_data(X, accept_sparse=self.accept_sparse, reset=reset)
- elif reset:
- # Set feature_names_in_ and n_features_in_ even if validate=False
- # We run this only when reset==True to store the attributes but not
- # validate them, because validate=False
- self._check_n_features(X, reset=reset)
- self._check_feature_names(X, reset=reset)
- return X
- def _check_inverse_transform(self, X):
- """Check that func and inverse_func are the inverse."""
- idx_selected = slice(None, None, max(1, X.shape[0] // 100))
- X_round_trip = self.inverse_transform(self.transform(X[idx_selected]))
- if hasattr(X, "dtype"):
- dtypes = [X.dtype]
- elif hasattr(X, "dtypes"):
- # Dataframes can have multiple dtypes
- dtypes = X.dtypes
- if not all(np.issubdtype(d, np.number) for d in dtypes):
- raise ValueError(
- "'check_inverse' is only supported when all the elements in `X` is"
- " numerical."
- )
- if not _allclose_dense_sparse(X[idx_selected], X_round_trip):
- warnings.warn(
- (
- "The provided functions are not strictly"
- " inverse of each other. If you are sure you"
- " want to proceed regardless, set"
- " 'check_inverse=False'."
- ),
- UserWarning,
- )
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y=None):
- """Fit transformer by checking X.
- If ``validate`` is ``True``, ``X`` will be checked.
- Parameters
- ----------
- X : {array-like, sparse-matrix} of shape (n_samples, n_features) \
- if `validate=True` else any object that `func` can handle
- Input array.
- y : Ignored
- Not used, present here for API consistency by convention.
- Returns
- -------
- self : object
- FunctionTransformer class instance.
- """
- X = self._check_input(X, reset=True)
- if self.check_inverse and not (self.func is None or self.inverse_func is None):
- self._check_inverse_transform(X)
- return self
- def transform(self, X):
- """Transform X using the forward function.
- Parameters
- ----------
- X : {array-like, sparse-matrix} of shape (n_samples, n_features) \
- if `validate=True` else any object that `func` can handle
- Input array.
- Returns
- -------
- X_out : array-like, shape (n_samples, n_features)
- Transformed input.
- """
- X = self._check_input(X, reset=False)
- return self._transform(X, func=self.func, kw_args=self.kw_args)
- def inverse_transform(self, X):
- """Transform X using the inverse function.
- Parameters
- ----------
- X : {array-like, sparse-matrix} of shape (n_samples, n_features) \
- if `validate=True` else any object that `inverse_func` can handle
- Input array.
- Returns
- -------
- X_out : array-like, shape (n_samples, n_features)
- Transformed input.
- """
- if self.validate:
- X = check_array(X, accept_sparse=self.accept_sparse)
- return self._transform(X, func=self.inverse_func, kw_args=self.inv_kw_args)
- @available_if(lambda self: self.feature_names_out is not None)
- def get_feature_names_out(self, input_features=None):
- """Get output feature names for transformation.
- This method is only defined if `feature_names_out` is not None.
- Parameters
- ----------
- input_features : array-like of str or None, default=None
- Input feature names.
- - If `input_features` is None, then `feature_names_in_` is
- used as the input feature names. If `feature_names_in_` is not
- defined, then names are generated:
- `[x0, x1, ..., x(n_features_in_ - 1)]`.
- - If `input_features` is array-like, then `input_features` must
- match `feature_names_in_` if `feature_names_in_` is defined.
- Returns
- -------
- feature_names_out : ndarray of str objects
- Transformed feature names.
- - If `feature_names_out` is 'one-to-one', the input feature names
- are returned (see `input_features` above). This requires
- `feature_names_in_` and/or `n_features_in_` to be defined, which
- is done automatically if `validate=True`. Alternatively, you can
- set them in `func`.
- - If `feature_names_out` is a callable, then it is called with two
- arguments, `self` and `input_features`, and its return value is
- returned by this method.
- """
- if hasattr(self, "n_features_in_") or input_features is not None:
- input_features = _check_feature_names_in(self, input_features)
- if self.feature_names_out == "one-to-one":
- names_out = input_features
- elif callable(self.feature_names_out):
- names_out = self.feature_names_out(self, input_features)
- else:
- raise ValueError(
- f"feature_names_out={self.feature_names_out!r} is invalid. "
- 'It must either be "one-to-one" or a callable with two '
- "arguments: the function transformer and an array-like of "
- "input feature names. The callable must return an array-like "
- "of output feature names."
- )
- return np.asarray(names_out, dtype=object)
- def _transform(self, X, func=None, kw_args=None):
- if func is None:
- func = _identity
- return func(X, **(kw_args if kw_args else {}))
- def __sklearn_is_fitted__(self):
- """Return True since FunctionTransfomer is stateless."""
- return True
- def _more_tags(self):
- return {"no_validation": not self.validate, "stateless": True}
- def set_output(self, *, transform=None):
- """Set output container.
- See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`
- for an example on how to use the API.
- Parameters
- ----------
- transform : {"default", "pandas"}, default=None
- Configure output of `transform` and `fit_transform`.
- - `"default"`: Default output format of a transformer
- - `"pandas"`: DataFrame output
- - `None`: Transform configuration is unchanged
- Returns
- -------
- self : estimator instance
- Estimator instance.
- """
- if hasattr(super(), "set_output"):
- return super().set_output(transform=transform)
- if transform == "pandas" and self.feature_names_out is None:
- warnings.warn(
- 'With transform="pandas", `func` should return a DataFrame to follow'
- " the set_output API."
- )
- return self
|