__init__.pyi 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. # Generated content DO NOT EDIT
  2. class Normalizer:
  3. """
  4. Base class for all normalizers
  5. This class is not supposed to be instantiated directly. Instead, any implementation of a
  6. Normalizer will return an instance of this class when instantiated.
  7. """
  8. def normalize(self, normalized):
  9. """
  10. Normalize a :class:`~tokenizers.NormalizedString` in-place
  11. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  12. keep track of the alignment information. If you just want to see the result
  13. of the normalization on a raw string, you can use
  14. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  15. Args:
  16. normalized (:class:`~tokenizers.NormalizedString`):
  17. The normalized string on which to apply this
  18. :class:`~tokenizers.normalizers.Normalizer`
  19. """
  20. pass
  21. def normalize_str(self, sequence):
  22. """
  23. Normalize the given string
  24. This method provides a way to visualize the effect of a
  25. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  26. information. If you need to get/convert offsets, you can use
  27. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  28. Args:
  29. sequence (:obj:`str`):
  30. A string to normalize
  31. Returns:
  32. :obj:`str`: A string after normalization
  33. """
  34. pass
  35. class BertNormalizer(Normalizer):
  36. """
  37. BertNormalizer
  38. Takes care of normalizing raw text before giving it to a Bert model.
  39. This includes cleaning the text, handling accents, chinese chars and lowercasing
  40. Args:
  41. clean_text (:obj:`bool`, `optional`, defaults to :obj:`True`):
  42. Whether to clean the text, by removing any control characters
  43. and replacing all whitespaces by the classic one.
  44. handle_chinese_chars (:obj:`bool`, `optional`, defaults to :obj:`True`):
  45. Whether to handle chinese chars by putting spaces around them.
  46. strip_accents (:obj:`bool`, `optional`):
  47. Whether to strip all accents. If this option is not specified (ie == None),
  48. then it will be determined by the value for `lowercase` (as in the original Bert).
  49. lowercase (:obj:`bool`, `optional`, defaults to :obj:`True`):
  50. Whether to lowercase.
  51. """
  52. def __init__(self, clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True):
  53. pass
  54. def normalize(self, normalized):
  55. """
  56. Normalize a :class:`~tokenizers.NormalizedString` in-place
  57. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  58. keep track of the alignment information. If you just want to see the result
  59. of the normalization on a raw string, you can use
  60. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  61. Args:
  62. normalized (:class:`~tokenizers.NormalizedString`):
  63. The normalized string on which to apply this
  64. :class:`~tokenizers.normalizers.Normalizer`
  65. """
  66. pass
  67. def normalize_str(self, sequence):
  68. """
  69. Normalize the given string
  70. This method provides a way to visualize the effect of a
  71. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  72. information. If you need to get/convert offsets, you can use
  73. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  74. Args:
  75. sequence (:obj:`str`):
  76. A string to normalize
  77. Returns:
  78. :obj:`str`: A string after normalization
  79. """
  80. pass
  81. class ByteLevel(Normalizer):
  82. """
  83. Bytelevel Normalizer
  84. """
  85. def __init__(self):
  86. pass
  87. def normalize(self, normalized):
  88. """
  89. Normalize a :class:`~tokenizers.NormalizedString` in-place
  90. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  91. keep track of the alignment information. If you just want to see the result
  92. of the normalization on a raw string, you can use
  93. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  94. Args:
  95. normalized (:class:`~tokenizers.NormalizedString`):
  96. The normalized string on which to apply this
  97. :class:`~tokenizers.normalizers.Normalizer`
  98. """
  99. pass
  100. def normalize_str(self, sequence):
  101. """
  102. Normalize the given string
  103. This method provides a way to visualize the effect of a
  104. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  105. information. If you need to get/convert offsets, you can use
  106. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  107. Args:
  108. sequence (:obj:`str`):
  109. A string to normalize
  110. Returns:
  111. :obj:`str`: A string after normalization
  112. """
  113. pass
  114. class Lowercase(Normalizer):
  115. """
  116. Lowercase Normalizer
  117. """
  118. def __init__(self):
  119. pass
  120. def normalize(self, normalized):
  121. """
  122. Normalize a :class:`~tokenizers.NormalizedString` in-place
  123. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  124. keep track of the alignment information. If you just want to see the result
  125. of the normalization on a raw string, you can use
  126. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  127. Args:
  128. normalized (:class:`~tokenizers.NormalizedString`):
  129. The normalized string on which to apply this
  130. :class:`~tokenizers.normalizers.Normalizer`
  131. """
  132. pass
  133. def normalize_str(self, sequence):
  134. """
  135. Normalize the given string
  136. This method provides a way to visualize the effect of a
  137. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  138. information. If you need to get/convert offsets, you can use
  139. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  140. Args:
  141. sequence (:obj:`str`):
  142. A string to normalize
  143. Returns:
  144. :obj:`str`: A string after normalization
  145. """
  146. pass
  147. class NFC(Normalizer):
  148. """
  149. NFC Unicode Normalizer
  150. """
  151. def __init__(self):
  152. pass
  153. def normalize(self, normalized):
  154. """
  155. Normalize a :class:`~tokenizers.NormalizedString` in-place
  156. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  157. keep track of the alignment information. If you just want to see the result
  158. of the normalization on a raw string, you can use
  159. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  160. Args:
  161. normalized (:class:`~tokenizers.NormalizedString`):
  162. The normalized string on which to apply this
  163. :class:`~tokenizers.normalizers.Normalizer`
  164. """
  165. pass
  166. def normalize_str(self, sequence):
  167. """
  168. Normalize the given string
  169. This method provides a way to visualize the effect of a
  170. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  171. information. If you need to get/convert offsets, you can use
  172. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  173. Args:
  174. sequence (:obj:`str`):
  175. A string to normalize
  176. Returns:
  177. :obj:`str`: A string after normalization
  178. """
  179. pass
  180. class NFD(Normalizer):
  181. """
  182. NFD Unicode Normalizer
  183. """
  184. def __init__(self):
  185. pass
  186. def normalize(self, normalized):
  187. """
  188. Normalize a :class:`~tokenizers.NormalizedString` in-place
  189. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  190. keep track of the alignment information. If you just want to see the result
  191. of the normalization on a raw string, you can use
  192. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  193. Args:
  194. normalized (:class:`~tokenizers.NormalizedString`):
  195. The normalized string on which to apply this
  196. :class:`~tokenizers.normalizers.Normalizer`
  197. """
  198. pass
  199. def normalize_str(self, sequence):
  200. """
  201. Normalize the given string
  202. This method provides a way to visualize the effect of a
  203. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  204. information. If you need to get/convert offsets, you can use
  205. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  206. Args:
  207. sequence (:obj:`str`):
  208. A string to normalize
  209. Returns:
  210. :obj:`str`: A string after normalization
  211. """
  212. pass
  213. class NFKC(Normalizer):
  214. """
  215. NFKC Unicode Normalizer
  216. """
  217. def __init__(self):
  218. pass
  219. def normalize(self, normalized):
  220. """
  221. Normalize a :class:`~tokenizers.NormalizedString` in-place
  222. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  223. keep track of the alignment information. If you just want to see the result
  224. of the normalization on a raw string, you can use
  225. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  226. Args:
  227. normalized (:class:`~tokenizers.NormalizedString`):
  228. The normalized string on which to apply this
  229. :class:`~tokenizers.normalizers.Normalizer`
  230. """
  231. pass
  232. def normalize_str(self, sequence):
  233. """
  234. Normalize the given string
  235. This method provides a way to visualize the effect of a
  236. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  237. information. If you need to get/convert offsets, you can use
  238. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  239. Args:
  240. sequence (:obj:`str`):
  241. A string to normalize
  242. Returns:
  243. :obj:`str`: A string after normalization
  244. """
  245. pass
  246. class NFKD(Normalizer):
  247. """
  248. NFKD Unicode Normalizer
  249. """
  250. def __init__(self):
  251. pass
  252. def normalize(self, normalized):
  253. """
  254. Normalize a :class:`~tokenizers.NormalizedString` in-place
  255. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  256. keep track of the alignment information. If you just want to see the result
  257. of the normalization on a raw string, you can use
  258. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  259. Args:
  260. normalized (:class:`~tokenizers.NormalizedString`):
  261. The normalized string on which to apply this
  262. :class:`~tokenizers.normalizers.Normalizer`
  263. """
  264. pass
  265. def normalize_str(self, sequence):
  266. """
  267. Normalize the given string
  268. This method provides a way to visualize the effect of a
  269. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  270. information. If you need to get/convert offsets, you can use
  271. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  272. Args:
  273. sequence (:obj:`str`):
  274. A string to normalize
  275. Returns:
  276. :obj:`str`: A string after normalization
  277. """
  278. pass
  279. class Nmt(Normalizer):
  280. """
  281. Nmt normalizer
  282. """
  283. def __init__(self):
  284. pass
  285. def normalize(self, normalized):
  286. """
  287. Normalize a :class:`~tokenizers.NormalizedString` in-place
  288. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  289. keep track of the alignment information. If you just want to see the result
  290. of the normalization on a raw string, you can use
  291. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  292. Args:
  293. normalized (:class:`~tokenizers.NormalizedString`):
  294. The normalized string on which to apply this
  295. :class:`~tokenizers.normalizers.Normalizer`
  296. """
  297. pass
  298. def normalize_str(self, sequence):
  299. """
  300. Normalize the given string
  301. This method provides a way to visualize the effect of a
  302. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  303. information. If you need to get/convert offsets, you can use
  304. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  305. Args:
  306. sequence (:obj:`str`):
  307. A string to normalize
  308. Returns:
  309. :obj:`str`: A string after normalization
  310. """
  311. pass
  312. class Precompiled(Normalizer):
  313. """
  314. Precompiled normalizer
  315. Don't use manually it is used for compatiblity for SentencePiece.
  316. """
  317. def __init__(self, precompiled_charsmap):
  318. pass
  319. def normalize(self, normalized):
  320. """
  321. Normalize a :class:`~tokenizers.NormalizedString` in-place
  322. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  323. keep track of the alignment information. If you just want to see the result
  324. of the normalization on a raw string, you can use
  325. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  326. Args:
  327. normalized (:class:`~tokenizers.NormalizedString`):
  328. The normalized string on which to apply this
  329. :class:`~tokenizers.normalizers.Normalizer`
  330. """
  331. pass
  332. def normalize_str(self, sequence):
  333. """
  334. Normalize the given string
  335. This method provides a way to visualize the effect of a
  336. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  337. information. If you need to get/convert offsets, you can use
  338. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  339. Args:
  340. sequence (:obj:`str`):
  341. A string to normalize
  342. Returns:
  343. :obj:`str`: A string after normalization
  344. """
  345. pass
  346. class Prepend(Normalizer):
  347. """
  348. Prepend normalizer
  349. """
  350. def __init__(self, prepend):
  351. pass
  352. def normalize(self, normalized):
  353. """
  354. Normalize a :class:`~tokenizers.NormalizedString` in-place
  355. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  356. keep track of the alignment information. If you just want to see the result
  357. of the normalization on a raw string, you can use
  358. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  359. Args:
  360. normalized (:class:`~tokenizers.NormalizedString`):
  361. The normalized string on which to apply this
  362. :class:`~tokenizers.normalizers.Normalizer`
  363. """
  364. pass
  365. def normalize_str(self, sequence):
  366. """
  367. Normalize the given string
  368. This method provides a way to visualize the effect of a
  369. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  370. information. If you need to get/convert offsets, you can use
  371. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  372. Args:
  373. sequence (:obj:`str`):
  374. A string to normalize
  375. Returns:
  376. :obj:`str`: A string after normalization
  377. """
  378. pass
  379. class Replace(Normalizer):
  380. """
  381. Replace normalizer
  382. """
  383. def __init__(self, pattern, content):
  384. pass
  385. def normalize(self, normalized):
  386. """
  387. Normalize a :class:`~tokenizers.NormalizedString` in-place
  388. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  389. keep track of the alignment information. If you just want to see the result
  390. of the normalization on a raw string, you can use
  391. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  392. Args:
  393. normalized (:class:`~tokenizers.NormalizedString`):
  394. The normalized string on which to apply this
  395. :class:`~tokenizers.normalizers.Normalizer`
  396. """
  397. pass
  398. def normalize_str(self, sequence):
  399. """
  400. Normalize the given string
  401. This method provides a way to visualize the effect of a
  402. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  403. information. If you need to get/convert offsets, you can use
  404. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  405. Args:
  406. sequence (:obj:`str`):
  407. A string to normalize
  408. Returns:
  409. :obj:`str`: A string after normalization
  410. """
  411. pass
  412. class Sequence(Normalizer):
  413. """
  414. Allows concatenating multiple other Normalizer as a Sequence.
  415. All the normalizers run in sequence in the given order
  416. Args:
  417. normalizers (:obj:`List[Normalizer]`):
  418. A list of Normalizer to be run as a sequence
  419. """
  420. def normalize(self, normalized):
  421. """
  422. Normalize a :class:`~tokenizers.NormalizedString` in-place
  423. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  424. keep track of the alignment information. If you just want to see the result
  425. of the normalization on a raw string, you can use
  426. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  427. Args:
  428. normalized (:class:`~tokenizers.NormalizedString`):
  429. The normalized string on which to apply this
  430. :class:`~tokenizers.normalizers.Normalizer`
  431. """
  432. pass
  433. def normalize_str(self, sequence):
  434. """
  435. Normalize the given string
  436. This method provides a way to visualize the effect of a
  437. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  438. information. If you need to get/convert offsets, you can use
  439. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  440. Args:
  441. sequence (:obj:`str`):
  442. A string to normalize
  443. Returns:
  444. :obj:`str`: A string after normalization
  445. """
  446. pass
  447. class Strip(Normalizer):
  448. """
  449. Strip normalizer
  450. """
  451. def __init__(self, left=True, right=True):
  452. pass
  453. def normalize(self, normalized):
  454. """
  455. Normalize a :class:`~tokenizers.NormalizedString` in-place
  456. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  457. keep track of the alignment information. If you just want to see the result
  458. of the normalization on a raw string, you can use
  459. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  460. Args:
  461. normalized (:class:`~tokenizers.NormalizedString`):
  462. The normalized string on which to apply this
  463. :class:`~tokenizers.normalizers.Normalizer`
  464. """
  465. pass
  466. def normalize_str(self, sequence):
  467. """
  468. Normalize the given string
  469. This method provides a way to visualize the effect of a
  470. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  471. information. If you need to get/convert offsets, you can use
  472. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  473. Args:
  474. sequence (:obj:`str`):
  475. A string to normalize
  476. Returns:
  477. :obj:`str`: A string after normalization
  478. """
  479. pass
  480. class StripAccents(Normalizer):
  481. """
  482. StripAccents normalizer
  483. """
  484. def __init__(self):
  485. pass
  486. def normalize(self, normalized):
  487. """
  488. Normalize a :class:`~tokenizers.NormalizedString` in-place
  489. This method allows to modify a :class:`~tokenizers.NormalizedString` to
  490. keep track of the alignment information. If you just want to see the result
  491. of the normalization on a raw string, you can use
  492. :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
  493. Args:
  494. normalized (:class:`~tokenizers.NormalizedString`):
  495. The normalized string on which to apply this
  496. :class:`~tokenizers.normalizers.Normalizer`
  497. """
  498. pass
  499. def normalize_str(self, sequence):
  500. """
  501. Normalize the given string
  502. This method provides a way to visualize the effect of a
  503. :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
  504. information. If you need to get/convert offsets, you can use
  505. :meth:`~tokenizers.normalizers.Normalizer.normalize`
  506. Args:
  507. sequence (:obj:`str`):
  508. A string to normalize
  509. Returns:
  510. :obj:`str`: A string after normalization
  511. """
  512. pass