| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636 |
- # Generated content DO NOT EDIT
- class Normalizer:
- """
- Base class for all normalizers
- This class is not supposed to be instantiated directly. Instead, any implementation of a
- Normalizer will return an instance of this class when instantiated.
- """
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class BertNormalizer(Normalizer):
- """
- BertNormalizer
- Takes care of normalizing raw text before giving it to a Bert model.
- This includes cleaning the text, handling accents, chinese chars and lowercasing
- Args:
- clean_text (:obj:`bool`, `optional`, defaults to :obj:`True`):
- Whether to clean the text, by removing any control characters
- and replacing all whitespaces by the classic one.
- handle_chinese_chars (:obj:`bool`, `optional`, defaults to :obj:`True`):
- Whether to handle chinese chars by putting spaces around them.
- strip_accents (:obj:`bool`, `optional`):
- Whether to strip all accents. If this option is not specified (ie == None),
- then it will be determined by the value for `lowercase` (as in the original Bert).
- lowercase (:obj:`bool`, `optional`, defaults to :obj:`True`):
- Whether to lowercase.
- """
- def __init__(self, clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class ByteLevel(Normalizer):
- """
- Bytelevel Normalizer
- """
- def __init__(self):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Lowercase(Normalizer):
- """
- Lowercase Normalizer
- """
- def __init__(self):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class NFC(Normalizer):
- """
- NFC Unicode Normalizer
- """
- def __init__(self):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class NFD(Normalizer):
- """
- NFD Unicode Normalizer
- """
- def __init__(self):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class NFKC(Normalizer):
- """
- NFKC Unicode Normalizer
- """
- def __init__(self):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class NFKD(Normalizer):
- """
- NFKD Unicode Normalizer
- """
- def __init__(self):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Nmt(Normalizer):
- """
- Nmt normalizer
- """
- def __init__(self):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Precompiled(Normalizer):
- """
- Precompiled normalizer
- Don't use manually it is used for compatiblity for SentencePiece.
- """
- def __init__(self, precompiled_charsmap):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Prepend(Normalizer):
- """
- Prepend normalizer
- """
- def __init__(self, prepend):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Replace(Normalizer):
- """
- Replace normalizer
- """
- def __init__(self, pattern, content):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Sequence(Normalizer):
- """
- Allows concatenating multiple other Normalizer as a Sequence.
- All the normalizers run in sequence in the given order
- Args:
- normalizers (:obj:`List[Normalizer]`):
- A list of Normalizer to be run as a sequence
- """
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class Strip(Normalizer):
- """
- Strip normalizer
- """
- def __init__(self, left=True, right=True):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
- class StripAccents(Normalizer):
- """
- StripAccents normalizer
- """
- def __init__(self):
- pass
- def normalize(self, normalized):
- """
- Normalize a :class:`~tokenizers.NormalizedString` in-place
- This method allows to modify a :class:`~tokenizers.NormalizedString` to
- keep track of the alignment information. If you just want to see the result
- of the normalization on a raw string, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
- Args:
- normalized (:class:`~tokenizers.NormalizedString`):
- The normalized string on which to apply this
- :class:`~tokenizers.normalizers.Normalizer`
- """
- pass
- def normalize_str(self, sequence):
- """
- Normalize the given string
- This method provides a way to visualize the effect of a
- :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
- information. If you need to get/convert offsets, you can use
- :meth:`~tokenizers.normalizers.Normalizer.normalize`
- Args:
- sequence (:obj:`str`):
- A string to normalize
- Returns:
- :obj:`str`: A string after normalization
- """
- pass
|