sentencepiece_model_pb2.py 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511
  1. # Generated by the protocol buffer compiler. DO NOT EDIT!
  2. # source: sentencepiece_model.proto
  3. # Copyright 2022 The HuggingFace Team. All rights reserved.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. from google.protobuf import descriptor as _descriptor
  17. from google.protobuf import message as _message
  18. from google.protobuf import reflection as _reflection
  19. from google.protobuf import symbol_database as _symbol_database
  20. # @@protoc_insertion_point(imports)
  21. _sym_db = _symbol_database.Default()
  22. DESCRIPTOR = _descriptor.FileDescriptor(
  23. name="sentencepiece_model.proto",
  24. package="sentencepiece",
  25. syntax="proto2",
  26. serialized_options=b"H\003",
  27. create_key=_descriptor._internal_create_key,
  28. serialized_pb=(
  29. b'\n\x19sentencepiece_model.proto\x12\rsentencepiece"\xa1\n\n\x0bTrainerSpec\x12\r\n\x05input\x18\x01'
  30. b" \x03(\t\x12\x14\n\x0cinput_format\x18\x07 \x01(\t\x12\x14\n\x0cmodel_prefix\x18\x02"
  31. b" \x01(\t\x12\x41\n\nmodel_type\x18\x03"
  32. b" \x01(\x0e\x32$.sentencepiece.TrainerSpec.ModelType:\x07UNIGRAM\x12\x18\n\nvocab_size\x18\x04"
  33. b" \x01(\x05:\x04\x38\x30\x30\x30\x12\x17\n\x0f\x61\x63\x63\x65pt_language\x18\x05 \x03(\t\x12"
  34. b' \n\x15self_test_sample_size\x18\x06 \x01(\x05:\x01\x30\x12"\n\x12\x63haracter_coverage\x18\n'
  35. b" \x01(\x02:\x06\x30.9995\x12\x1e\n\x13input_sentence_size\x18\x0b"
  36. b" \x01(\x04:\x01\x30\x12$\n\x16shuffle_input_sentence\x18\x13 \x01(\x08:\x04true\x12"
  37. b' \n\x14mining_sentence_size\x18\x0c \x01(\x05\x42\x02\x18\x01\x12"\n\x16training_sentence_size\x18\r'
  38. b" \x01(\x05\x42\x02\x18\x01\x12(\n\x17seed_sentencepiece_size\x18\x0e"
  39. b" \x01(\x05:\x07\x31\x30\x30\x30\x30\x30\x30\x12\x1e\n\x10shrinking_factor\x18\x0f"
  40. b" \x01(\x02:\x04\x30.75\x12!\n\x13max_sentence_length\x18\x12"
  41. b" \x01(\x05:\x04\x34\x31\x39\x32\x12\x17\n\x0bnum_threads\x18\x10"
  42. b" \x01(\x05:\x02\x31\x36\x12\x1d\n\x12num_sub_iterations\x18\x11"
  43. b" \x01(\x05:\x01\x32\x12$\n\x18max_sentencepiece_length\x18\x14"
  44. b" \x01(\x05:\x02\x31\x36\x12%\n\x17split_by_unicode_script\x18\x15"
  45. b" \x01(\x08:\x04true\x12\x1d\n\x0fsplit_by_number\x18\x17"
  46. b" \x01(\x08:\x04true\x12!\n\x13split_by_whitespace\x18\x16"
  47. b" \x01(\x08:\x04true\x12)\n\x1atreat_whitespace_as_suffix\x18\x18"
  48. b" \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0csplit_digits\x18\x19"
  49. b" \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0f\x63ontrol_symbols\x18\x1e"
  50. b" \x03(\t\x12\x1c\n\x14user_defined_symbols\x18\x1f \x03(\t\x12\x16\n\x0erequired_chars\x18$"
  51. b" \x01(\t\x12\x1c\n\rbyte_fallback\x18# \x01(\x08:\x05\x66\x61lse\x12+\n\x1dvocabulary_output_piece_score\x18"
  52. b' \x01(\x08:\x04true\x12\x1e\n\x10hard_vocab_limit\x18! \x01(\x08:\x04true\x12\x1c\n\ruse_all_vocab\x18"'
  53. b" \x01(\x08:\x05\x66\x61lse\x12\x11\n\x06unk_id\x18( \x01(\x05:\x01\x30\x12\x11\n\x06\x62os_id\x18)"
  54. b" \x01(\x05:\x01\x31\x12\x11\n\x06\x65os_id\x18* \x01(\x05:\x01\x32\x12\x12\n\x06pad_id\x18+"
  55. b" \x01(\x05:\x02-1\x12\x18\n\tunk_piece\x18- \x01(\t:\x05<unk>\x12\x16\n\tbos_piece\x18."
  56. b" \x01(\t:\x03<s>\x12\x17\n\teos_piece\x18/ \x01(\t:\x04</s>\x12\x18\n\tpad_piece\x18\x30"
  57. b" \x01(\t:\x05<pad>\x12\x1a\n\x0bunk_surface\x18, \x01(\t:\x05 \xe2\x81\x87"
  58. b" \x12+\n\x1ctrain_extremely_large_corpus\x18\x31"
  59. b' \x01(\x08:\x05\x66\x61lse"5\n\tModelType\x12\x0b\n\x07UNIGRAM\x10\x01\x12\x07\n\x03\x42PE\x10\x02\x12\x08\n\x04WORD\x10\x03\x12\x08\n\x04\x43HAR\x10\x04*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xd1\x01\n\x0eNormalizerSpec\x12\x0c\n\x04name\x18\x01'
  60. b" \x01(\t\x12\x1c\n\x14precompiled_charsmap\x18\x02 \x01(\x0c\x12\x1e\n\x10\x61\x64\x64_dummy_prefix\x18\x03"
  61. b" \x01(\x08:\x04true\x12&\n\x18remove_extra_whitespaces\x18\x04 \x01(\x08:\x04true\x12"
  62. b" \n\x12\x65scape_whitespaces\x18\x05 \x01(\x08:\x04true\x12\x1e\n\x16normalization_rule_tsv\x18\x06"
  63. b' \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"y\n\x0cSelfTestData\x12\x33\n\x07samples\x18\x01'
  64. b' \x03(\x0b\x32".sentencepiece.SelfTestData.Sample\x1a)\n\x06Sample\x12\r\n\x05input\x18\x01'
  65. b" \x01(\t\x12\x10\n\x08\x65xpected\x18\x02"
  66. b' \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xfe\x03\n\nModelProto\x12\x37\n\x06pieces\x18\x01'
  67. b" \x03(\x0b\x32'.sentencepiece.ModelProto.SentencePiece\x12\x30\n\x0ctrainer_spec\x18\x02"
  68. b" \x01(\x0b\x32\x1a.sentencepiece.TrainerSpec\x12\x36\n\x0fnormalizer_spec\x18\x03"
  69. b" \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x12\x33\n\x0eself_test_data\x18\x04"
  70. b" \x01(\x0b\x32\x1b.sentencepiece.SelfTestData\x12\x38\n\x11\x64\x65normalizer_spec\x18\x05"
  71. b" \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x1a\xd2\x01\n\rSentencePiece\x12\r\n\x05piece\x18\x01"
  72. b" \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x42\n\x04type\x18\x03"
  73. b' \x01(\x0e\x32,.sentencepiece.ModelProto.SentencePiece.Type:\x06NORMAL"T\n\x04Type\x12\n\n\x06NORMAL\x10\x01\x12\x0b\n\x07UNKNOWN\x10\x02\x12\x0b\n\x07\x43ONTROL\x10\x03\x12\x10\n\x0cUSER_DEFINED\x10\x04\x12\x08\n\x04\x42YTE\x10\x06\x12\n\n\x06UNUSED\x10\x05*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\x42\x02H\x03'
  74. ),
  75. )
  76. _TRAINERSPEC_MODELTYPE = _descriptor.EnumDescriptor(
  77. name="ModelType",
  78. full_name="sentencepiece.TrainerSpec.ModelType",
  79. filename=None,
  80. file=DESCRIPTOR,
  81. create_key=_descriptor._internal_create_key,
  82. values=[
  83. _descriptor.EnumValueDescriptor(
  84. name="UNIGRAM",
  85. index=0,
  86. number=1,
  87. serialized_options=None,
  88. type=None,
  89. create_key=_descriptor._internal_create_key,
  90. ),
  91. _descriptor.EnumValueDescriptor(
  92. name="BPE",
  93. index=1,
  94. number=2,
  95. serialized_options=None,
  96. type=None,
  97. create_key=_descriptor._internal_create_key,
  98. ),
  99. _descriptor.EnumValueDescriptor(
  100. name="WORD",
  101. index=2,
  102. number=3,
  103. serialized_options=None,
  104. type=None,
  105. create_key=_descriptor._internal_create_key,
  106. ),
  107. _descriptor.EnumValueDescriptor(
  108. name="CHAR",
  109. index=3,
  110. number=4,
  111. serialized_options=None,
  112. type=None,
  113. create_key=_descriptor._internal_create_key,
  114. ),
  115. ],
  116. containing_type=None,
  117. serialized_options=None,
  118. serialized_start=1294,
  119. serialized_end=1347,
  120. )
  121. _sym_db.RegisterEnumDescriptor(_TRAINERSPEC_MODELTYPE)
  122. _MODELPROTO_SENTENCEPIECE_TYPE = _descriptor.EnumDescriptor(
  123. name="Type",
  124. full_name="sentencepiece.ModelProto.SentencePiece.Type",
  125. filename=None,
  126. file=DESCRIPTOR,
  127. create_key=_descriptor._internal_create_key,
  128. values=[
  129. _descriptor.EnumValueDescriptor(
  130. name="NORMAL",
  131. index=0,
  132. number=1,
  133. serialized_options=None,
  134. type=None,
  135. create_key=_descriptor._internal_create_key,
  136. ),
  137. _descriptor.EnumValueDescriptor(
  138. name="UNKNOWN",
  139. index=1,
  140. number=2,
  141. serialized_options=None,
  142. type=None,
  143. create_key=_descriptor._internal_create_key,
  144. ),
  145. _descriptor.EnumValueDescriptor(
  146. name="CONTROL",
  147. index=2,
  148. number=3,
  149. serialized_options=None,
  150. type=None,
  151. create_key=_descriptor._internal_create_key,
  152. ),
  153. _descriptor.EnumValueDescriptor(
  154. name="USER_DEFINED",
  155. index=3,
  156. number=4,
  157. serialized_options=None,
  158. type=None,
  159. create_key=_descriptor._internal_create_key,
  160. ),
  161. _descriptor.EnumValueDescriptor(
  162. name="BYTE",
  163. index=4,
  164. number=6,
  165. serialized_options=None,
  166. type=None,
  167. create_key=_descriptor._internal_create_key,
  168. ),
  169. _descriptor.EnumValueDescriptor(
  170. name="UNUSED",
  171. index=5,
  172. number=5,
  173. serialized_options=None,
  174. type=None,
  175. create_key=_descriptor._internal_create_key,
  176. ),
  177. ],
  178. containing_type=None,
  179. serialized_options=None,
  180. serialized_start=2100,
  181. serialized_end=2184,
  182. )
  183. _sym_db.RegisterEnumDescriptor(_MODELPROTO_SENTENCEPIECE_TYPE)
  184. _TRAINERSPEC = _descriptor.Descriptor(
  185. name="TrainerSpec",
  186. full_name="sentencepiece.TrainerSpec",
  187. filename=None,
  188. file=DESCRIPTOR,
  189. containing_type=None,
  190. create_key=_descriptor._internal_create_key,
  191. fields=[
  192. _descriptor.FieldDescriptor(
  193. name="input",
  194. full_name="sentencepiece.TrainerSpec.input",
  195. index=0,
  196. number=1,
  197. type=9,
  198. cpp_type=9,
  199. label=3,
  200. has_default_value=False,
  201. default_value=[],
  202. message_type=None,
  203. enum_type=None,
  204. containing_type=None,
  205. is_extension=False,
  206. extension_scope=None,
  207. serialized_options=None,
  208. file=DESCRIPTOR,
  209. create_key=_descriptor._internal_create_key,
  210. ),
  211. _descriptor.FieldDescriptor(
  212. name="input_format",
  213. full_name="sentencepiece.TrainerSpec.input_format",
  214. index=1,
  215. number=7,
  216. type=9,
  217. cpp_type=9,
  218. label=1,
  219. has_default_value=False,
  220. default_value=b"".decode("utf-8"),
  221. message_type=None,
  222. enum_type=None,
  223. containing_type=None,
  224. is_extension=False,
  225. extension_scope=None,
  226. serialized_options=None,
  227. file=DESCRIPTOR,
  228. create_key=_descriptor._internal_create_key,
  229. ),
  230. _descriptor.FieldDescriptor(
  231. name="model_prefix",
  232. full_name="sentencepiece.TrainerSpec.model_prefix",
  233. index=2,
  234. number=2,
  235. type=9,
  236. cpp_type=9,
  237. label=1,
  238. has_default_value=False,
  239. default_value=b"".decode("utf-8"),
  240. message_type=None,
  241. enum_type=None,
  242. containing_type=None,
  243. is_extension=False,
  244. extension_scope=None,
  245. serialized_options=None,
  246. file=DESCRIPTOR,
  247. create_key=_descriptor._internal_create_key,
  248. ),
  249. _descriptor.FieldDescriptor(
  250. name="model_type",
  251. full_name="sentencepiece.TrainerSpec.model_type",
  252. index=3,
  253. number=3,
  254. type=14,
  255. cpp_type=8,
  256. label=1,
  257. has_default_value=True,
  258. default_value=1,
  259. message_type=None,
  260. enum_type=None,
  261. containing_type=None,
  262. is_extension=False,
  263. extension_scope=None,
  264. serialized_options=None,
  265. file=DESCRIPTOR,
  266. create_key=_descriptor._internal_create_key,
  267. ),
  268. _descriptor.FieldDescriptor(
  269. name="vocab_size",
  270. full_name="sentencepiece.TrainerSpec.vocab_size",
  271. index=4,
  272. number=4,
  273. type=5,
  274. cpp_type=1,
  275. label=1,
  276. has_default_value=True,
  277. default_value=8000,
  278. message_type=None,
  279. enum_type=None,
  280. containing_type=None,
  281. is_extension=False,
  282. extension_scope=None,
  283. serialized_options=None,
  284. file=DESCRIPTOR,
  285. create_key=_descriptor._internal_create_key,
  286. ),
  287. _descriptor.FieldDescriptor(
  288. name="accept_language",
  289. full_name="sentencepiece.TrainerSpec.accept_language",
  290. index=5,
  291. number=5,
  292. type=9,
  293. cpp_type=9,
  294. label=3,
  295. has_default_value=False,
  296. default_value=[],
  297. message_type=None,
  298. enum_type=None,
  299. containing_type=None,
  300. is_extension=False,
  301. extension_scope=None,
  302. serialized_options=None,
  303. file=DESCRIPTOR,
  304. create_key=_descriptor._internal_create_key,
  305. ),
  306. _descriptor.FieldDescriptor(
  307. name="self_test_sample_size",
  308. full_name="sentencepiece.TrainerSpec.self_test_sample_size",
  309. index=6,
  310. number=6,
  311. type=5,
  312. cpp_type=1,
  313. label=1,
  314. has_default_value=True,
  315. default_value=0,
  316. message_type=None,
  317. enum_type=None,
  318. containing_type=None,
  319. is_extension=False,
  320. extension_scope=None,
  321. serialized_options=None,
  322. file=DESCRIPTOR,
  323. create_key=_descriptor._internal_create_key,
  324. ),
  325. _descriptor.FieldDescriptor(
  326. name="character_coverage",
  327. full_name="sentencepiece.TrainerSpec.character_coverage",
  328. index=7,
  329. number=10,
  330. type=2,
  331. cpp_type=6,
  332. label=1,
  333. has_default_value=True,
  334. default_value=float(0.9995),
  335. message_type=None,
  336. enum_type=None,
  337. containing_type=None,
  338. is_extension=False,
  339. extension_scope=None,
  340. serialized_options=None,
  341. file=DESCRIPTOR,
  342. create_key=_descriptor._internal_create_key,
  343. ),
  344. _descriptor.FieldDescriptor(
  345. name="input_sentence_size",
  346. full_name="sentencepiece.TrainerSpec.input_sentence_size",
  347. index=8,
  348. number=11,
  349. type=4,
  350. cpp_type=4,
  351. label=1,
  352. has_default_value=True,
  353. default_value=0,
  354. message_type=None,
  355. enum_type=None,
  356. containing_type=None,
  357. is_extension=False,
  358. extension_scope=None,
  359. serialized_options=None,
  360. file=DESCRIPTOR,
  361. create_key=_descriptor._internal_create_key,
  362. ),
  363. _descriptor.FieldDescriptor(
  364. name="shuffle_input_sentence",
  365. full_name="sentencepiece.TrainerSpec.shuffle_input_sentence",
  366. index=9,
  367. number=19,
  368. type=8,
  369. cpp_type=7,
  370. label=1,
  371. has_default_value=True,
  372. default_value=True,
  373. message_type=None,
  374. enum_type=None,
  375. containing_type=None,
  376. is_extension=False,
  377. extension_scope=None,
  378. serialized_options=None,
  379. file=DESCRIPTOR,
  380. create_key=_descriptor._internal_create_key,
  381. ),
  382. _descriptor.FieldDescriptor(
  383. name="mining_sentence_size",
  384. full_name="sentencepiece.TrainerSpec.mining_sentence_size",
  385. index=10,
  386. number=12,
  387. type=5,
  388. cpp_type=1,
  389. label=1,
  390. has_default_value=False,
  391. default_value=0,
  392. message_type=None,
  393. enum_type=None,
  394. containing_type=None,
  395. is_extension=False,
  396. extension_scope=None,
  397. serialized_options=b"\030\001",
  398. file=DESCRIPTOR,
  399. create_key=_descriptor._internal_create_key,
  400. ),
  401. _descriptor.FieldDescriptor(
  402. name="training_sentence_size",
  403. full_name="sentencepiece.TrainerSpec.training_sentence_size",
  404. index=11,
  405. number=13,
  406. type=5,
  407. cpp_type=1,
  408. label=1,
  409. has_default_value=False,
  410. default_value=0,
  411. message_type=None,
  412. enum_type=None,
  413. containing_type=None,
  414. is_extension=False,
  415. extension_scope=None,
  416. serialized_options=b"\030\001",
  417. file=DESCRIPTOR,
  418. create_key=_descriptor._internal_create_key,
  419. ),
  420. _descriptor.FieldDescriptor(
  421. name="seed_sentencepiece_size",
  422. full_name="sentencepiece.TrainerSpec.seed_sentencepiece_size",
  423. index=12,
  424. number=14,
  425. type=5,
  426. cpp_type=1,
  427. label=1,
  428. has_default_value=True,
  429. default_value=1000000,
  430. message_type=None,
  431. enum_type=None,
  432. containing_type=None,
  433. is_extension=False,
  434. extension_scope=None,
  435. serialized_options=None,
  436. file=DESCRIPTOR,
  437. create_key=_descriptor._internal_create_key,
  438. ),
  439. _descriptor.FieldDescriptor(
  440. name="shrinking_factor",
  441. full_name="sentencepiece.TrainerSpec.shrinking_factor",
  442. index=13,
  443. number=15,
  444. type=2,
  445. cpp_type=6,
  446. label=1,
  447. has_default_value=True,
  448. default_value=float(0.75),
  449. message_type=None,
  450. enum_type=None,
  451. containing_type=None,
  452. is_extension=False,
  453. extension_scope=None,
  454. serialized_options=None,
  455. file=DESCRIPTOR,
  456. create_key=_descriptor._internal_create_key,
  457. ),
  458. _descriptor.FieldDescriptor(
  459. name="max_sentence_length",
  460. full_name="sentencepiece.TrainerSpec.max_sentence_length",
  461. index=14,
  462. number=18,
  463. type=5,
  464. cpp_type=1,
  465. label=1,
  466. has_default_value=True,
  467. default_value=4192,
  468. message_type=None,
  469. enum_type=None,
  470. containing_type=None,
  471. is_extension=False,
  472. extension_scope=None,
  473. serialized_options=None,
  474. file=DESCRIPTOR,
  475. create_key=_descriptor._internal_create_key,
  476. ),
  477. _descriptor.FieldDescriptor(
  478. name="num_threads",
  479. full_name="sentencepiece.TrainerSpec.num_threads",
  480. index=15,
  481. number=16,
  482. type=5,
  483. cpp_type=1,
  484. label=1,
  485. has_default_value=True,
  486. default_value=16,
  487. message_type=None,
  488. enum_type=None,
  489. containing_type=None,
  490. is_extension=False,
  491. extension_scope=None,
  492. serialized_options=None,
  493. file=DESCRIPTOR,
  494. create_key=_descriptor._internal_create_key,
  495. ),
  496. _descriptor.FieldDescriptor(
  497. name="num_sub_iterations",
  498. full_name="sentencepiece.TrainerSpec.num_sub_iterations",
  499. index=16,
  500. number=17,
  501. type=5,
  502. cpp_type=1,
  503. label=1,
  504. has_default_value=True,
  505. default_value=2,
  506. message_type=None,
  507. enum_type=None,
  508. containing_type=None,
  509. is_extension=False,
  510. extension_scope=None,
  511. serialized_options=None,
  512. file=DESCRIPTOR,
  513. create_key=_descriptor._internal_create_key,
  514. ),
  515. _descriptor.FieldDescriptor(
  516. name="max_sentencepiece_length",
  517. full_name="sentencepiece.TrainerSpec.max_sentencepiece_length",
  518. index=17,
  519. number=20,
  520. type=5,
  521. cpp_type=1,
  522. label=1,
  523. has_default_value=True,
  524. default_value=16,
  525. message_type=None,
  526. enum_type=None,
  527. containing_type=None,
  528. is_extension=False,
  529. extension_scope=None,
  530. serialized_options=None,
  531. file=DESCRIPTOR,
  532. create_key=_descriptor._internal_create_key,
  533. ),
  534. _descriptor.FieldDescriptor(
  535. name="split_by_unicode_script",
  536. full_name="sentencepiece.TrainerSpec.split_by_unicode_script",
  537. index=18,
  538. number=21,
  539. type=8,
  540. cpp_type=7,
  541. label=1,
  542. has_default_value=True,
  543. default_value=True,
  544. message_type=None,
  545. enum_type=None,
  546. containing_type=None,
  547. is_extension=False,
  548. extension_scope=None,
  549. serialized_options=None,
  550. file=DESCRIPTOR,
  551. create_key=_descriptor._internal_create_key,
  552. ),
  553. _descriptor.FieldDescriptor(
  554. name="split_by_number",
  555. full_name="sentencepiece.TrainerSpec.split_by_number",
  556. index=19,
  557. number=23,
  558. type=8,
  559. cpp_type=7,
  560. label=1,
  561. has_default_value=True,
  562. default_value=True,
  563. message_type=None,
  564. enum_type=None,
  565. containing_type=None,
  566. is_extension=False,
  567. extension_scope=None,
  568. serialized_options=None,
  569. file=DESCRIPTOR,
  570. create_key=_descriptor._internal_create_key,
  571. ),
  572. _descriptor.FieldDescriptor(
  573. name="split_by_whitespace",
  574. full_name="sentencepiece.TrainerSpec.split_by_whitespace",
  575. index=20,
  576. number=22,
  577. type=8,
  578. cpp_type=7,
  579. label=1,
  580. has_default_value=True,
  581. default_value=True,
  582. message_type=None,
  583. enum_type=None,
  584. containing_type=None,
  585. is_extension=False,
  586. extension_scope=None,
  587. serialized_options=None,
  588. file=DESCRIPTOR,
  589. create_key=_descriptor._internal_create_key,
  590. ),
  591. _descriptor.FieldDescriptor(
  592. name="treat_whitespace_as_suffix",
  593. full_name="sentencepiece.TrainerSpec.treat_whitespace_as_suffix",
  594. index=21,
  595. number=24,
  596. type=8,
  597. cpp_type=7,
  598. label=1,
  599. has_default_value=True,
  600. default_value=False,
  601. message_type=None,
  602. enum_type=None,
  603. containing_type=None,
  604. is_extension=False,
  605. extension_scope=None,
  606. serialized_options=None,
  607. file=DESCRIPTOR,
  608. create_key=_descriptor._internal_create_key,
  609. ),
  610. _descriptor.FieldDescriptor(
  611. name="split_digits",
  612. full_name="sentencepiece.TrainerSpec.split_digits",
  613. index=22,
  614. number=25,
  615. type=8,
  616. cpp_type=7,
  617. label=1,
  618. has_default_value=True,
  619. default_value=False,
  620. message_type=None,
  621. enum_type=None,
  622. containing_type=None,
  623. is_extension=False,
  624. extension_scope=None,
  625. serialized_options=None,
  626. file=DESCRIPTOR,
  627. create_key=_descriptor._internal_create_key,
  628. ),
  629. _descriptor.FieldDescriptor(
  630. name="control_symbols",
  631. full_name="sentencepiece.TrainerSpec.control_symbols",
  632. index=23,
  633. number=30,
  634. type=9,
  635. cpp_type=9,
  636. label=3,
  637. has_default_value=False,
  638. default_value=[],
  639. message_type=None,
  640. enum_type=None,
  641. containing_type=None,
  642. is_extension=False,
  643. extension_scope=None,
  644. serialized_options=None,
  645. file=DESCRIPTOR,
  646. create_key=_descriptor._internal_create_key,
  647. ),
  648. _descriptor.FieldDescriptor(
  649. name="user_defined_symbols",
  650. full_name="sentencepiece.TrainerSpec.user_defined_symbols",
  651. index=24,
  652. number=31,
  653. type=9,
  654. cpp_type=9,
  655. label=3,
  656. has_default_value=False,
  657. default_value=[],
  658. message_type=None,
  659. enum_type=None,
  660. containing_type=None,
  661. is_extension=False,
  662. extension_scope=None,
  663. serialized_options=None,
  664. file=DESCRIPTOR,
  665. create_key=_descriptor._internal_create_key,
  666. ),
  667. _descriptor.FieldDescriptor(
  668. name="required_chars",
  669. full_name="sentencepiece.TrainerSpec.required_chars",
  670. index=25,
  671. number=36,
  672. type=9,
  673. cpp_type=9,
  674. label=1,
  675. has_default_value=False,
  676. default_value=b"".decode("utf-8"),
  677. message_type=None,
  678. enum_type=None,
  679. containing_type=None,
  680. is_extension=False,
  681. extension_scope=None,
  682. serialized_options=None,
  683. file=DESCRIPTOR,
  684. create_key=_descriptor._internal_create_key,
  685. ),
  686. _descriptor.FieldDescriptor(
  687. name="byte_fallback",
  688. full_name="sentencepiece.TrainerSpec.byte_fallback",
  689. index=26,
  690. number=35,
  691. type=8,
  692. cpp_type=7,
  693. label=1,
  694. has_default_value=True,
  695. default_value=False,
  696. message_type=None,
  697. enum_type=None,
  698. containing_type=None,
  699. is_extension=False,
  700. extension_scope=None,
  701. serialized_options=None,
  702. file=DESCRIPTOR,
  703. create_key=_descriptor._internal_create_key,
  704. ),
  705. _descriptor.FieldDescriptor(
  706. name="vocabulary_output_piece_score",
  707. full_name="sentencepiece.TrainerSpec.vocabulary_output_piece_score",
  708. index=27,
  709. number=32,
  710. type=8,
  711. cpp_type=7,
  712. label=1,
  713. has_default_value=True,
  714. default_value=True,
  715. message_type=None,
  716. enum_type=None,
  717. containing_type=None,
  718. is_extension=False,
  719. extension_scope=None,
  720. serialized_options=None,
  721. file=DESCRIPTOR,
  722. create_key=_descriptor._internal_create_key,
  723. ),
  724. _descriptor.FieldDescriptor(
  725. name="hard_vocab_limit",
  726. full_name="sentencepiece.TrainerSpec.hard_vocab_limit",
  727. index=28,
  728. number=33,
  729. type=8,
  730. cpp_type=7,
  731. label=1,
  732. has_default_value=True,
  733. default_value=True,
  734. message_type=None,
  735. enum_type=None,
  736. containing_type=None,
  737. is_extension=False,
  738. extension_scope=None,
  739. serialized_options=None,
  740. file=DESCRIPTOR,
  741. create_key=_descriptor._internal_create_key,
  742. ),
  743. _descriptor.FieldDescriptor(
  744. name="use_all_vocab",
  745. full_name="sentencepiece.TrainerSpec.use_all_vocab",
  746. index=29,
  747. number=34,
  748. type=8,
  749. cpp_type=7,
  750. label=1,
  751. has_default_value=True,
  752. default_value=False,
  753. message_type=None,
  754. enum_type=None,
  755. containing_type=None,
  756. is_extension=False,
  757. extension_scope=None,
  758. serialized_options=None,
  759. file=DESCRIPTOR,
  760. create_key=_descriptor._internal_create_key,
  761. ),
  762. _descriptor.FieldDescriptor(
  763. name="unk_id",
  764. full_name="sentencepiece.TrainerSpec.unk_id",
  765. index=30,
  766. number=40,
  767. type=5,
  768. cpp_type=1,
  769. label=1,
  770. has_default_value=True,
  771. default_value=0,
  772. message_type=None,
  773. enum_type=None,
  774. containing_type=None,
  775. is_extension=False,
  776. extension_scope=None,
  777. serialized_options=None,
  778. file=DESCRIPTOR,
  779. create_key=_descriptor._internal_create_key,
  780. ),
  781. _descriptor.FieldDescriptor(
  782. name="bos_id",
  783. full_name="sentencepiece.TrainerSpec.bos_id",
  784. index=31,
  785. number=41,
  786. type=5,
  787. cpp_type=1,
  788. label=1,
  789. has_default_value=True,
  790. default_value=1,
  791. message_type=None,
  792. enum_type=None,
  793. containing_type=None,
  794. is_extension=False,
  795. extension_scope=None,
  796. serialized_options=None,
  797. file=DESCRIPTOR,
  798. create_key=_descriptor._internal_create_key,
  799. ),
  800. _descriptor.FieldDescriptor(
  801. name="eos_id",
  802. full_name="sentencepiece.TrainerSpec.eos_id",
  803. index=32,
  804. number=42,
  805. type=5,
  806. cpp_type=1,
  807. label=1,
  808. has_default_value=True,
  809. default_value=2,
  810. message_type=None,
  811. enum_type=None,
  812. containing_type=None,
  813. is_extension=False,
  814. extension_scope=None,
  815. serialized_options=None,
  816. file=DESCRIPTOR,
  817. create_key=_descriptor._internal_create_key,
  818. ),
  819. _descriptor.FieldDescriptor(
  820. name="pad_id",
  821. full_name="sentencepiece.TrainerSpec.pad_id",
  822. index=33,
  823. number=43,
  824. type=5,
  825. cpp_type=1,
  826. label=1,
  827. has_default_value=True,
  828. default_value=-1,
  829. message_type=None,
  830. enum_type=None,
  831. containing_type=None,
  832. is_extension=False,
  833. extension_scope=None,
  834. serialized_options=None,
  835. file=DESCRIPTOR,
  836. create_key=_descriptor._internal_create_key,
  837. ),
  838. _descriptor.FieldDescriptor(
  839. name="unk_piece",
  840. full_name="sentencepiece.TrainerSpec.unk_piece",
  841. index=34,
  842. number=45,
  843. type=9,
  844. cpp_type=9,
  845. label=1,
  846. has_default_value=True,
  847. default_value=b"<unk>".decode("utf-8"),
  848. message_type=None,
  849. enum_type=None,
  850. containing_type=None,
  851. is_extension=False,
  852. extension_scope=None,
  853. serialized_options=None,
  854. file=DESCRIPTOR,
  855. create_key=_descriptor._internal_create_key,
  856. ),
  857. _descriptor.FieldDescriptor(
  858. name="bos_piece",
  859. full_name="sentencepiece.TrainerSpec.bos_piece",
  860. index=35,
  861. number=46,
  862. type=9,
  863. cpp_type=9,
  864. label=1,
  865. has_default_value=True,
  866. default_value=b"<s>".decode("utf-8"),
  867. message_type=None,
  868. enum_type=None,
  869. containing_type=None,
  870. is_extension=False,
  871. extension_scope=None,
  872. serialized_options=None,
  873. file=DESCRIPTOR,
  874. create_key=_descriptor._internal_create_key,
  875. ),
  876. _descriptor.FieldDescriptor(
  877. name="eos_piece",
  878. full_name="sentencepiece.TrainerSpec.eos_piece",
  879. index=36,
  880. number=47,
  881. type=9,
  882. cpp_type=9,
  883. label=1,
  884. has_default_value=True,
  885. default_value=b"</s>".decode("utf-8"),
  886. message_type=None,
  887. enum_type=None,
  888. containing_type=None,
  889. is_extension=False,
  890. extension_scope=None,
  891. serialized_options=None,
  892. file=DESCRIPTOR,
  893. create_key=_descriptor._internal_create_key,
  894. ),
  895. _descriptor.FieldDescriptor(
  896. name="pad_piece",
  897. full_name="sentencepiece.TrainerSpec.pad_piece",
  898. index=37,
  899. number=48,
  900. type=9,
  901. cpp_type=9,
  902. label=1,
  903. has_default_value=True,
  904. default_value=b"<pad>".decode("utf-8"),
  905. message_type=None,
  906. enum_type=None,
  907. containing_type=None,
  908. is_extension=False,
  909. extension_scope=None,
  910. serialized_options=None,
  911. file=DESCRIPTOR,
  912. create_key=_descriptor._internal_create_key,
  913. ),
  914. _descriptor.FieldDescriptor(
  915. name="unk_surface",
  916. full_name="sentencepiece.TrainerSpec.unk_surface",
  917. index=38,
  918. number=44,
  919. type=9,
  920. cpp_type=9,
  921. label=1,
  922. has_default_value=True,
  923. default_value=b" \342\201\207 ".decode("utf-8"),
  924. message_type=None,
  925. enum_type=None,
  926. containing_type=None,
  927. is_extension=False,
  928. extension_scope=None,
  929. serialized_options=None,
  930. file=DESCRIPTOR,
  931. create_key=_descriptor._internal_create_key,
  932. ),
  933. _descriptor.FieldDescriptor(
  934. name="train_extremely_large_corpus",
  935. full_name="sentencepiece.TrainerSpec.train_extremely_large_corpus",
  936. index=39,
  937. number=49,
  938. type=8,
  939. cpp_type=7,
  940. label=1,
  941. has_default_value=True,
  942. default_value=False,
  943. message_type=None,
  944. enum_type=None,
  945. containing_type=None,
  946. is_extension=False,
  947. extension_scope=None,
  948. serialized_options=None,
  949. file=DESCRIPTOR,
  950. create_key=_descriptor._internal_create_key,
  951. ),
  952. ],
  953. extensions=[],
  954. nested_types=[],
  955. enum_types=[
  956. _TRAINERSPEC_MODELTYPE,
  957. ],
  958. serialized_options=None,
  959. is_extendable=True,
  960. syntax="proto2",
  961. extension_ranges=[
  962. (200, 536870912),
  963. ],
  964. oneofs=[],
  965. serialized_start=45,
  966. serialized_end=1358,
  967. )
  968. _NORMALIZERSPEC = _descriptor.Descriptor(
  969. name="NormalizerSpec",
  970. full_name="sentencepiece.NormalizerSpec",
  971. filename=None,
  972. file=DESCRIPTOR,
  973. containing_type=None,
  974. create_key=_descriptor._internal_create_key,
  975. fields=[
  976. _descriptor.FieldDescriptor(
  977. name="name",
  978. full_name="sentencepiece.NormalizerSpec.name",
  979. index=0,
  980. number=1,
  981. type=9,
  982. cpp_type=9,
  983. label=1,
  984. has_default_value=False,
  985. default_value=b"".decode("utf-8"),
  986. message_type=None,
  987. enum_type=None,
  988. containing_type=None,
  989. is_extension=False,
  990. extension_scope=None,
  991. serialized_options=None,
  992. file=DESCRIPTOR,
  993. create_key=_descriptor._internal_create_key,
  994. ),
  995. _descriptor.FieldDescriptor(
  996. name="precompiled_charsmap",
  997. full_name="sentencepiece.NormalizerSpec.precompiled_charsmap",
  998. index=1,
  999. number=2,
  1000. type=12,
  1001. cpp_type=9,
  1002. label=1,
  1003. has_default_value=False,
  1004. default_value=b"",
  1005. message_type=None,
  1006. enum_type=None,
  1007. containing_type=None,
  1008. is_extension=False,
  1009. extension_scope=None,
  1010. serialized_options=None,
  1011. file=DESCRIPTOR,
  1012. create_key=_descriptor._internal_create_key,
  1013. ),
  1014. _descriptor.FieldDescriptor(
  1015. name="add_dummy_prefix",
  1016. full_name="sentencepiece.NormalizerSpec.add_dummy_prefix",
  1017. index=2,
  1018. number=3,
  1019. type=8,
  1020. cpp_type=7,
  1021. label=1,
  1022. has_default_value=True,
  1023. default_value=True,
  1024. message_type=None,
  1025. enum_type=None,
  1026. containing_type=None,
  1027. is_extension=False,
  1028. extension_scope=None,
  1029. serialized_options=None,
  1030. file=DESCRIPTOR,
  1031. create_key=_descriptor._internal_create_key,
  1032. ),
  1033. _descriptor.FieldDescriptor(
  1034. name="remove_extra_whitespaces",
  1035. full_name="sentencepiece.NormalizerSpec.remove_extra_whitespaces",
  1036. index=3,
  1037. number=4,
  1038. type=8,
  1039. cpp_type=7,
  1040. label=1,
  1041. has_default_value=True,
  1042. default_value=True,
  1043. message_type=None,
  1044. enum_type=None,
  1045. containing_type=None,
  1046. is_extension=False,
  1047. extension_scope=None,
  1048. serialized_options=None,
  1049. file=DESCRIPTOR,
  1050. create_key=_descriptor._internal_create_key,
  1051. ),
  1052. _descriptor.FieldDescriptor(
  1053. name="escape_whitespaces",
  1054. full_name="sentencepiece.NormalizerSpec.escape_whitespaces",
  1055. index=4,
  1056. number=5,
  1057. type=8,
  1058. cpp_type=7,
  1059. label=1,
  1060. has_default_value=True,
  1061. default_value=True,
  1062. message_type=None,
  1063. enum_type=None,
  1064. containing_type=None,
  1065. is_extension=False,
  1066. extension_scope=None,
  1067. serialized_options=None,
  1068. file=DESCRIPTOR,
  1069. create_key=_descriptor._internal_create_key,
  1070. ),
  1071. _descriptor.FieldDescriptor(
  1072. name="normalization_rule_tsv",
  1073. full_name="sentencepiece.NormalizerSpec.normalization_rule_tsv",
  1074. index=5,
  1075. number=6,
  1076. type=9,
  1077. cpp_type=9,
  1078. label=1,
  1079. has_default_value=False,
  1080. default_value=b"".decode("utf-8"),
  1081. message_type=None,
  1082. enum_type=None,
  1083. containing_type=None,
  1084. is_extension=False,
  1085. extension_scope=None,
  1086. serialized_options=None,
  1087. file=DESCRIPTOR,
  1088. create_key=_descriptor._internal_create_key,
  1089. ),
  1090. ],
  1091. extensions=[],
  1092. nested_types=[],
  1093. enum_types=[],
  1094. serialized_options=None,
  1095. is_extendable=True,
  1096. syntax="proto2",
  1097. extension_ranges=[
  1098. (200, 536870912),
  1099. ],
  1100. oneofs=[],
  1101. serialized_start=1361,
  1102. serialized_end=1570,
  1103. )
  1104. _SELFTESTDATA_SAMPLE = _descriptor.Descriptor(
  1105. name="Sample",
  1106. full_name="sentencepiece.SelfTestData.Sample",
  1107. filename=None,
  1108. file=DESCRIPTOR,
  1109. containing_type=None,
  1110. create_key=_descriptor._internal_create_key,
  1111. fields=[
  1112. _descriptor.FieldDescriptor(
  1113. name="input",
  1114. full_name="sentencepiece.SelfTestData.Sample.input",
  1115. index=0,
  1116. number=1,
  1117. type=9,
  1118. cpp_type=9,
  1119. label=1,
  1120. has_default_value=False,
  1121. default_value=b"".decode("utf-8"),
  1122. message_type=None,
  1123. enum_type=None,
  1124. containing_type=None,
  1125. is_extension=False,
  1126. extension_scope=None,
  1127. serialized_options=None,
  1128. file=DESCRIPTOR,
  1129. create_key=_descriptor._internal_create_key,
  1130. ),
  1131. _descriptor.FieldDescriptor(
  1132. name="expected",
  1133. full_name="sentencepiece.SelfTestData.Sample.expected",
  1134. index=1,
  1135. number=2,
  1136. type=9,
  1137. cpp_type=9,
  1138. label=1,
  1139. has_default_value=False,
  1140. default_value=b"".decode("utf-8"),
  1141. message_type=None,
  1142. enum_type=None,
  1143. containing_type=None,
  1144. is_extension=False,
  1145. extension_scope=None,
  1146. serialized_options=None,
  1147. file=DESCRIPTOR,
  1148. create_key=_descriptor._internal_create_key,
  1149. ),
  1150. ],
  1151. extensions=[],
  1152. nested_types=[],
  1153. enum_types=[],
  1154. serialized_options=None,
  1155. is_extendable=False,
  1156. syntax="proto2",
  1157. extension_ranges=[],
  1158. oneofs=[],
  1159. serialized_start=1641,
  1160. serialized_end=1682,
  1161. )
  1162. _SELFTESTDATA = _descriptor.Descriptor(
  1163. name="SelfTestData",
  1164. full_name="sentencepiece.SelfTestData",
  1165. filename=None,
  1166. file=DESCRIPTOR,
  1167. containing_type=None,
  1168. create_key=_descriptor._internal_create_key,
  1169. fields=[
  1170. _descriptor.FieldDescriptor(
  1171. name="samples",
  1172. full_name="sentencepiece.SelfTestData.samples",
  1173. index=0,
  1174. number=1,
  1175. type=11,
  1176. cpp_type=10,
  1177. label=3,
  1178. has_default_value=False,
  1179. default_value=[],
  1180. message_type=None,
  1181. enum_type=None,
  1182. containing_type=None,
  1183. is_extension=False,
  1184. extension_scope=None,
  1185. serialized_options=None,
  1186. file=DESCRIPTOR,
  1187. create_key=_descriptor._internal_create_key,
  1188. ),
  1189. ],
  1190. extensions=[],
  1191. nested_types=[
  1192. _SELFTESTDATA_SAMPLE,
  1193. ],
  1194. enum_types=[],
  1195. serialized_options=None,
  1196. is_extendable=True,
  1197. syntax="proto2",
  1198. extension_ranges=[
  1199. (200, 536870912),
  1200. ],
  1201. oneofs=[],
  1202. serialized_start=1572,
  1203. serialized_end=1693,
  1204. )
  1205. _MODELPROTO_SENTENCEPIECE = _descriptor.Descriptor(
  1206. name="SentencePiece",
  1207. full_name="sentencepiece.ModelProto.SentencePiece",
  1208. filename=None,
  1209. file=DESCRIPTOR,
  1210. containing_type=None,
  1211. create_key=_descriptor._internal_create_key,
  1212. fields=[
  1213. _descriptor.FieldDescriptor(
  1214. name="piece",
  1215. full_name="sentencepiece.ModelProto.SentencePiece.piece",
  1216. index=0,
  1217. number=1,
  1218. type=9,
  1219. cpp_type=9,
  1220. label=1,
  1221. has_default_value=False,
  1222. default_value=b"".decode("utf-8"),
  1223. message_type=None,
  1224. enum_type=None,
  1225. containing_type=None,
  1226. is_extension=False,
  1227. extension_scope=None,
  1228. serialized_options=None,
  1229. file=DESCRIPTOR,
  1230. create_key=_descriptor._internal_create_key,
  1231. ),
  1232. _descriptor.FieldDescriptor(
  1233. name="score",
  1234. full_name="sentencepiece.ModelProto.SentencePiece.score",
  1235. index=1,
  1236. number=2,
  1237. type=2,
  1238. cpp_type=6,
  1239. label=1,
  1240. has_default_value=False,
  1241. default_value=float(0),
  1242. message_type=None,
  1243. enum_type=None,
  1244. containing_type=None,
  1245. is_extension=False,
  1246. extension_scope=None,
  1247. serialized_options=None,
  1248. file=DESCRIPTOR,
  1249. create_key=_descriptor._internal_create_key,
  1250. ),
  1251. _descriptor.FieldDescriptor(
  1252. name="type",
  1253. full_name="sentencepiece.ModelProto.SentencePiece.type",
  1254. index=2,
  1255. number=3,
  1256. type=14,
  1257. cpp_type=8,
  1258. label=1,
  1259. has_default_value=True,
  1260. default_value=1,
  1261. message_type=None,
  1262. enum_type=None,
  1263. containing_type=None,
  1264. is_extension=False,
  1265. extension_scope=None,
  1266. serialized_options=None,
  1267. file=DESCRIPTOR,
  1268. create_key=_descriptor._internal_create_key,
  1269. ),
  1270. ],
  1271. extensions=[],
  1272. nested_types=[],
  1273. enum_types=[
  1274. _MODELPROTO_SENTENCEPIECE_TYPE,
  1275. ],
  1276. serialized_options=None,
  1277. is_extendable=True,
  1278. syntax="proto2",
  1279. extension_ranges=[
  1280. (200, 536870912),
  1281. ],
  1282. oneofs=[],
  1283. serialized_start=1985,
  1284. serialized_end=2195,
  1285. )
  1286. _MODELPROTO = _descriptor.Descriptor(
  1287. name="ModelProto",
  1288. full_name="sentencepiece.ModelProto",
  1289. filename=None,
  1290. file=DESCRIPTOR,
  1291. containing_type=None,
  1292. create_key=_descriptor._internal_create_key,
  1293. fields=[
  1294. _descriptor.FieldDescriptor(
  1295. name="pieces",
  1296. full_name="sentencepiece.ModelProto.pieces",
  1297. index=0,
  1298. number=1,
  1299. type=11,
  1300. cpp_type=10,
  1301. label=3,
  1302. has_default_value=False,
  1303. default_value=[],
  1304. message_type=None,
  1305. enum_type=None,
  1306. containing_type=None,
  1307. is_extension=False,
  1308. extension_scope=None,
  1309. serialized_options=None,
  1310. file=DESCRIPTOR,
  1311. create_key=_descriptor._internal_create_key,
  1312. ),
  1313. _descriptor.FieldDescriptor(
  1314. name="trainer_spec",
  1315. full_name="sentencepiece.ModelProto.trainer_spec",
  1316. index=1,
  1317. number=2,
  1318. type=11,
  1319. cpp_type=10,
  1320. label=1,
  1321. has_default_value=False,
  1322. default_value=None,
  1323. message_type=None,
  1324. enum_type=None,
  1325. containing_type=None,
  1326. is_extension=False,
  1327. extension_scope=None,
  1328. serialized_options=None,
  1329. file=DESCRIPTOR,
  1330. create_key=_descriptor._internal_create_key,
  1331. ),
  1332. _descriptor.FieldDescriptor(
  1333. name="normalizer_spec",
  1334. full_name="sentencepiece.ModelProto.normalizer_spec",
  1335. index=2,
  1336. number=3,
  1337. type=11,
  1338. cpp_type=10,
  1339. label=1,
  1340. has_default_value=False,
  1341. default_value=None,
  1342. message_type=None,
  1343. enum_type=None,
  1344. containing_type=None,
  1345. is_extension=False,
  1346. extension_scope=None,
  1347. serialized_options=None,
  1348. file=DESCRIPTOR,
  1349. create_key=_descriptor._internal_create_key,
  1350. ),
  1351. _descriptor.FieldDescriptor(
  1352. name="self_test_data",
  1353. full_name="sentencepiece.ModelProto.self_test_data",
  1354. index=3,
  1355. number=4,
  1356. type=11,
  1357. cpp_type=10,
  1358. label=1,
  1359. has_default_value=False,
  1360. default_value=None,
  1361. message_type=None,
  1362. enum_type=None,
  1363. containing_type=None,
  1364. is_extension=False,
  1365. extension_scope=None,
  1366. serialized_options=None,
  1367. file=DESCRIPTOR,
  1368. create_key=_descriptor._internal_create_key,
  1369. ),
  1370. _descriptor.FieldDescriptor(
  1371. name="denormalizer_spec",
  1372. full_name="sentencepiece.ModelProto.denormalizer_spec",
  1373. index=4,
  1374. number=5,
  1375. type=11,
  1376. cpp_type=10,
  1377. label=1,
  1378. has_default_value=False,
  1379. default_value=None,
  1380. message_type=None,
  1381. enum_type=None,
  1382. containing_type=None,
  1383. is_extension=False,
  1384. extension_scope=None,
  1385. serialized_options=None,
  1386. file=DESCRIPTOR,
  1387. create_key=_descriptor._internal_create_key,
  1388. ),
  1389. ],
  1390. extensions=[],
  1391. nested_types=[
  1392. _MODELPROTO_SENTENCEPIECE,
  1393. ],
  1394. enum_types=[],
  1395. serialized_options=None,
  1396. is_extendable=True,
  1397. syntax="proto2",
  1398. extension_ranges=[
  1399. (200, 536870912),
  1400. ],
  1401. oneofs=[],
  1402. serialized_start=1696,
  1403. serialized_end=2206,
  1404. )
  1405. _TRAINERSPEC.fields_by_name["model_type"].enum_type = _TRAINERSPEC_MODELTYPE
  1406. _TRAINERSPEC_MODELTYPE.containing_type = _TRAINERSPEC
  1407. _SELFTESTDATA_SAMPLE.containing_type = _SELFTESTDATA
  1408. _SELFTESTDATA.fields_by_name["samples"].message_type = _SELFTESTDATA_SAMPLE
  1409. _MODELPROTO_SENTENCEPIECE.fields_by_name["type"].enum_type = _MODELPROTO_SENTENCEPIECE_TYPE
  1410. _MODELPROTO_SENTENCEPIECE.containing_type = _MODELPROTO
  1411. _MODELPROTO_SENTENCEPIECE_TYPE.containing_type = _MODELPROTO_SENTENCEPIECE
  1412. _MODELPROTO.fields_by_name["pieces"].message_type = _MODELPROTO_SENTENCEPIECE
  1413. _MODELPROTO.fields_by_name["trainer_spec"].message_type = _TRAINERSPEC
  1414. _MODELPROTO.fields_by_name["normalizer_spec"].message_type = _NORMALIZERSPEC
  1415. _MODELPROTO.fields_by_name["self_test_data"].message_type = _SELFTESTDATA
  1416. _MODELPROTO.fields_by_name["denormalizer_spec"].message_type = _NORMALIZERSPEC
  1417. DESCRIPTOR.message_types_by_name["TrainerSpec"] = _TRAINERSPEC
  1418. DESCRIPTOR.message_types_by_name["NormalizerSpec"] = _NORMALIZERSPEC
  1419. DESCRIPTOR.message_types_by_name["SelfTestData"] = _SELFTESTDATA
  1420. DESCRIPTOR.message_types_by_name["ModelProto"] = _MODELPROTO
  1421. _sym_db.RegisterFileDescriptor(DESCRIPTOR)
  1422. TrainerSpec = _reflection.GeneratedProtocolMessageType(
  1423. "TrainerSpec",
  1424. (_message.Message,),
  1425. {
  1426. "DESCRIPTOR": _TRAINERSPEC,
  1427. "__module__": "sentencepiece_model_pb2",
  1428. # @@protoc_insertion_point(class_scope:sentencepiece.TrainerSpec)
  1429. },
  1430. )
  1431. _sym_db.RegisterMessage(TrainerSpec)
  1432. NormalizerSpec = _reflection.GeneratedProtocolMessageType(
  1433. "NormalizerSpec",
  1434. (_message.Message,),
  1435. {
  1436. "DESCRIPTOR": _NORMALIZERSPEC,
  1437. "__module__": "sentencepiece_model_pb2",
  1438. # @@protoc_insertion_point(class_scope:sentencepiece.NormalizerSpec)
  1439. },
  1440. )
  1441. _sym_db.RegisterMessage(NormalizerSpec)
  1442. SelfTestData = _reflection.GeneratedProtocolMessageType(
  1443. "SelfTestData",
  1444. (_message.Message,),
  1445. {
  1446. "Sample": _reflection.GeneratedProtocolMessageType(
  1447. "Sample",
  1448. (_message.Message,),
  1449. {
  1450. "DESCRIPTOR": _SELFTESTDATA_SAMPLE,
  1451. "__module__": "sentencepiece_model_pb2",
  1452. # @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData.Sample)
  1453. },
  1454. ),
  1455. "DESCRIPTOR": _SELFTESTDATA,
  1456. "__module__": "sentencepiece_model_pb2",
  1457. # @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData)
  1458. },
  1459. )
  1460. _sym_db.RegisterMessage(SelfTestData)
  1461. _sym_db.RegisterMessage(SelfTestData.Sample)
  1462. ModelProto = _reflection.GeneratedProtocolMessageType(
  1463. "ModelProto",
  1464. (_message.Message,),
  1465. {
  1466. "SentencePiece": _reflection.GeneratedProtocolMessageType(
  1467. "SentencePiece",
  1468. (_message.Message,),
  1469. {
  1470. "DESCRIPTOR": _MODELPROTO_SENTENCEPIECE,
  1471. "__module__": "sentencepiece_model_pb2",
  1472. # @@protoc_insertion_point(class_scope:sentencepiece.ModelProto.SentencePiece)
  1473. },
  1474. ),
  1475. "DESCRIPTOR": _MODELPROTO,
  1476. "__module__": "sentencepiece_model_pb2",
  1477. # @@protoc_insertion_point(class_scope:sentencepiece.ModelProto)
  1478. },
  1479. )
  1480. _sym_db.RegisterMessage(ModelProto)
  1481. _sym_db.RegisterMessage(ModelProto.SentencePiece)
  1482. DESCRIPTOR._options = None
  1483. _TRAINERSPEC.fields_by_name["mining_sentence_size"]._options = None
  1484. _TRAINERSPEC.fields_by_name["training_sentence_size"]._options = None
  1485. # @@protoc_insertion_point(module_scope)