__init__.py 841 B

1234567891011121314151617181920212223242526272829
  1. from .. import normalizers
  2. Normalizer = normalizers.Normalizer
  3. BertNormalizer = normalizers.BertNormalizer
  4. NFD = normalizers.NFD
  5. NFKD = normalizers.NFKD
  6. NFC = normalizers.NFC
  7. NFKC = normalizers.NFKC
  8. Sequence = normalizers.Sequence
  9. Lowercase = normalizers.Lowercase
  10. Prepend = normalizers.Prepend
  11. Strip = normalizers.Strip
  12. StripAccents = normalizers.StripAccents
  13. Nmt = normalizers.Nmt
  14. Precompiled = normalizers.Precompiled
  15. Replace = normalizers.Replace
  16. ByteLevel = normalizers.ByteLevel
  17. NORMALIZERS = {"nfc": NFC, "nfd": NFD, "nfkc": NFKC, "nfkd": NFKD}
  18. def unicode_normalizer_from_str(normalizer: str) -> Normalizer:
  19. if normalizer not in NORMALIZERS:
  20. raise ValueError(
  21. "{} is not a known unicode normalizer. Available are {}".format(normalizer, NORMALIZERS.keys())
  22. )
  23. return NORMALIZERS[normalizer]()