| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511 |
- # Generated by the protocol buffer compiler. DO NOT EDIT!
- # source: sentencepiece_model.proto
- # Copyright 2022 The HuggingFace Team. All rights reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from google.protobuf import descriptor as _descriptor
- from google.protobuf import message as _message
- from google.protobuf import reflection as _reflection
- from google.protobuf import symbol_database as _symbol_database
- # @@protoc_insertion_point(imports)
- _sym_db = _symbol_database.Default()
- DESCRIPTOR = _descriptor.FileDescriptor(
- name="sentencepiece_model.proto",
- package="sentencepiece",
- syntax="proto2",
- serialized_options=b"H\003",
- create_key=_descriptor._internal_create_key,
- serialized_pb=(
- b'\n\x19sentencepiece_model.proto\x12\rsentencepiece"\xa1\n\n\x0bTrainerSpec\x12\r\n\x05input\x18\x01'
- b" \x03(\t\x12\x14\n\x0cinput_format\x18\x07 \x01(\t\x12\x14\n\x0cmodel_prefix\x18\x02"
- b" \x01(\t\x12\x41\n\nmodel_type\x18\x03"
- b" \x01(\x0e\x32$.sentencepiece.TrainerSpec.ModelType:\x07UNIGRAM\x12\x18\n\nvocab_size\x18\x04"
- b" \x01(\x05:\x04\x38\x30\x30\x30\x12\x17\n\x0f\x61\x63\x63\x65pt_language\x18\x05 \x03(\t\x12"
- b' \n\x15self_test_sample_size\x18\x06 \x01(\x05:\x01\x30\x12"\n\x12\x63haracter_coverage\x18\n'
- b" \x01(\x02:\x06\x30.9995\x12\x1e\n\x13input_sentence_size\x18\x0b"
- b" \x01(\x04:\x01\x30\x12$\n\x16shuffle_input_sentence\x18\x13 \x01(\x08:\x04true\x12"
- b' \n\x14mining_sentence_size\x18\x0c \x01(\x05\x42\x02\x18\x01\x12"\n\x16training_sentence_size\x18\r'
- b" \x01(\x05\x42\x02\x18\x01\x12(\n\x17seed_sentencepiece_size\x18\x0e"
- b" \x01(\x05:\x07\x31\x30\x30\x30\x30\x30\x30\x12\x1e\n\x10shrinking_factor\x18\x0f"
- b" \x01(\x02:\x04\x30.75\x12!\n\x13max_sentence_length\x18\x12"
- b" \x01(\x05:\x04\x34\x31\x39\x32\x12\x17\n\x0bnum_threads\x18\x10"
- b" \x01(\x05:\x02\x31\x36\x12\x1d\n\x12num_sub_iterations\x18\x11"
- b" \x01(\x05:\x01\x32\x12$\n\x18max_sentencepiece_length\x18\x14"
- b" \x01(\x05:\x02\x31\x36\x12%\n\x17split_by_unicode_script\x18\x15"
- b" \x01(\x08:\x04true\x12\x1d\n\x0fsplit_by_number\x18\x17"
- b" \x01(\x08:\x04true\x12!\n\x13split_by_whitespace\x18\x16"
- b" \x01(\x08:\x04true\x12)\n\x1atreat_whitespace_as_suffix\x18\x18"
- b" \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0csplit_digits\x18\x19"
- b" \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0f\x63ontrol_symbols\x18\x1e"
- b" \x03(\t\x12\x1c\n\x14user_defined_symbols\x18\x1f \x03(\t\x12\x16\n\x0erequired_chars\x18$"
- b" \x01(\t\x12\x1c\n\rbyte_fallback\x18# \x01(\x08:\x05\x66\x61lse\x12+\n\x1dvocabulary_output_piece_score\x18"
- b' \x01(\x08:\x04true\x12\x1e\n\x10hard_vocab_limit\x18! \x01(\x08:\x04true\x12\x1c\n\ruse_all_vocab\x18"'
- b" \x01(\x08:\x05\x66\x61lse\x12\x11\n\x06unk_id\x18( \x01(\x05:\x01\x30\x12\x11\n\x06\x62os_id\x18)"
- b" \x01(\x05:\x01\x31\x12\x11\n\x06\x65os_id\x18* \x01(\x05:\x01\x32\x12\x12\n\x06pad_id\x18+"
- b" \x01(\x05:\x02-1\x12\x18\n\tunk_piece\x18- \x01(\t:\x05<unk>\x12\x16\n\tbos_piece\x18."
- b" \x01(\t:\x03<s>\x12\x17\n\teos_piece\x18/ \x01(\t:\x04</s>\x12\x18\n\tpad_piece\x18\x30"
- b" \x01(\t:\x05<pad>\x12\x1a\n\x0bunk_surface\x18, \x01(\t:\x05 \xe2\x81\x87"
- b" \x12+\n\x1ctrain_extremely_large_corpus\x18\x31"
- b' \x01(\x08:\x05\x66\x61lse"5\n\tModelType\x12\x0b\n\x07UNIGRAM\x10\x01\x12\x07\n\x03\x42PE\x10\x02\x12\x08\n\x04WORD\x10\x03\x12\x08\n\x04\x43HAR\x10\x04*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xd1\x01\n\x0eNormalizerSpec\x12\x0c\n\x04name\x18\x01'
- b" \x01(\t\x12\x1c\n\x14precompiled_charsmap\x18\x02 \x01(\x0c\x12\x1e\n\x10\x61\x64\x64_dummy_prefix\x18\x03"
- b" \x01(\x08:\x04true\x12&\n\x18remove_extra_whitespaces\x18\x04 \x01(\x08:\x04true\x12"
- b" \n\x12\x65scape_whitespaces\x18\x05 \x01(\x08:\x04true\x12\x1e\n\x16normalization_rule_tsv\x18\x06"
- b' \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"y\n\x0cSelfTestData\x12\x33\n\x07samples\x18\x01'
- b' \x03(\x0b\x32".sentencepiece.SelfTestData.Sample\x1a)\n\x06Sample\x12\r\n\x05input\x18\x01'
- b" \x01(\t\x12\x10\n\x08\x65xpected\x18\x02"
- b' \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xfe\x03\n\nModelProto\x12\x37\n\x06pieces\x18\x01'
- b" \x03(\x0b\x32'.sentencepiece.ModelProto.SentencePiece\x12\x30\n\x0ctrainer_spec\x18\x02"
- b" \x01(\x0b\x32\x1a.sentencepiece.TrainerSpec\x12\x36\n\x0fnormalizer_spec\x18\x03"
- b" \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x12\x33\n\x0eself_test_data\x18\x04"
- b" \x01(\x0b\x32\x1b.sentencepiece.SelfTestData\x12\x38\n\x11\x64\x65normalizer_spec\x18\x05"
- b" \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x1a\xd2\x01\n\rSentencePiece\x12\r\n\x05piece\x18\x01"
- b" \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x42\n\x04type\x18\x03"
- b' \x01(\x0e\x32,.sentencepiece.ModelProto.SentencePiece.Type:\x06NORMAL"T\n\x04Type\x12\n\n\x06NORMAL\x10\x01\x12\x0b\n\x07UNKNOWN\x10\x02\x12\x0b\n\x07\x43ONTROL\x10\x03\x12\x10\n\x0cUSER_DEFINED\x10\x04\x12\x08\n\x04\x42YTE\x10\x06\x12\n\n\x06UNUSED\x10\x05*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\x42\x02H\x03'
- ),
- )
- _TRAINERSPEC_MODELTYPE = _descriptor.EnumDescriptor(
- name="ModelType",
- full_name="sentencepiece.TrainerSpec.ModelType",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="UNIGRAM",
- index=0,
- number=1,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="BPE",
- index=1,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="WORD",
- index=2,
- number=3,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="CHAR",
- index=3,
- number=4,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=1294,
- serialized_end=1347,
- )
- _sym_db.RegisterEnumDescriptor(_TRAINERSPEC_MODELTYPE)
- _MODELPROTO_SENTENCEPIECE_TYPE = _descriptor.EnumDescriptor(
- name="Type",
- full_name="sentencepiece.ModelProto.SentencePiece.Type",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="NORMAL",
- index=0,
- number=1,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="UNKNOWN",
- index=1,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="CONTROL",
- index=2,
- number=3,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="USER_DEFINED",
- index=3,
- number=4,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="BYTE",
- index=4,
- number=6,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="UNUSED",
- index=5,
- number=5,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=2100,
- serialized_end=2184,
- )
- _sym_db.RegisterEnumDescriptor(_MODELPROTO_SENTENCEPIECE_TYPE)
- _TRAINERSPEC = _descriptor.Descriptor(
- name="TrainerSpec",
- full_name="sentencepiece.TrainerSpec",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="input",
- full_name="sentencepiece.TrainerSpec.input",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="input_format",
- full_name="sentencepiece.TrainerSpec.input_format",
- index=1,
- number=7,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model_prefix",
- full_name="sentencepiece.TrainerSpec.model_prefix",
- index=2,
- number=2,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model_type",
- full_name="sentencepiece.TrainerSpec.model_type",
- index=3,
- number=3,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=True,
- default_value=1,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="vocab_size",
- full_name="sentencepiece.TrainerSpec.vocab_size",
- index=4,
- number=4,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=8000,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="accept_language",
- full_name="sentencepiece.TrainerSpec.accept_language",
- index=5,
- number=5,
- type=9,
- cpp_type=9,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="self_test_sample_size",
- full_name="sentencepiece.TrainerSpec.self_test_sample_size",
- index=6,
- number=6,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="character_coverage",
- full_name="sentencepiece.TrainerSpec.character_coverage",
- index=7,
- number=10,
- type=2,
- cpp_type=6,
- label=1,
- has_default_value=True,
- default_value=float(0.9995),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="input_sentence_size",
- full_name="sentencepiece.TrainerSpec.input_sentence_size",
- index=8,
- number=11,
- type=4,
- cpp_type=4,
- label=1,
- has_default_value=True,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="shuffle_input_sentence",
- full_name="sentencepiece.TrainerSpec.shuffle_input_sentence",
- index=9,
- number=19,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=True,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="mining_sentence_size",
- full_name="sentencepiece.TrainerSpec.mining_sentence_size",
- index=10,
- number=12,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\030\001",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="training_sentence_size",
- full_name="sentencepiece.TrainerSpec.training_sentence_size",
- index=11,
- number=13,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\030\001",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="seed_sentencepiece_size",
- full_name="sentencepiece.TrainerSpec.seed_sentencepiece_size",
- index=12,
- number=14,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=1000000,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="shrinking_factor",
- full_name="sentencepiece.TrainerSpec.shrinking_factor",
- index=13,
- number=15,
- type=2,
- cpp_type=6,
- label=1,
- has_default_value=True,
- default_value=float(0.75),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="max_sentence_length",
- full_name="sentencepiece.TrainerSpec.max_sentence_length",
- index=14,
- number=18,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=4192,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="num_threads",
- full_name="sentencepiece.TrainerSpec.num_threads",
- index=15,
- number=16,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=16,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="num_sub_iterations",
- full_name="sentencepiece.TrainerSpec.num_sub_iterations",
- index=16,
- number=17,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=2,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="max_sentencepiece_length",
- full_name="sentencepiece.TrainerSpec.max_sentencepiece_length",
- index=17,
- number=20,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=16,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="split_by_unicode_script",
- full_name="sentencepiece.TrainerSpec.split_by_unicode_script",
- index=18,
- number=21,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=True,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="split_by_number",
- full_name="sentencepiece.TrainerSpec.split_by_number",
- index=19,
- number=23,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=True,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="split_by_whitespace",
- full_name="sentencepiece.TrainerSpec.split_by_whitespace",
- index=20,
- number=22,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=True,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="treat_whitespace_as_suffix",
- full_name="sentencepiece.TrainerSpec.treat_whitespace_as_suffix",
- index=21,
- number=24,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=False,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="split_digits",
- full_name="sentencepiece.TrainerSpec.split_digits",
- index=22,
- number=25,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=False,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="control_symbols",
- full_name="sentencepiece.TrainerSpec.control_symbols",
- index=23,
- number=30,
- type=9,
- cpp_type=9,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="user_defined_symbols",
- full_name="sentencepiece.TrainerSpec.user_defined_symbols",
- index=24,
- number=31,
- type=9,
- cpp_type=9,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="required_chars",
- full_name="sentencepiece.TrainerSpec.required_chars",
- index=25,
- number=36,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="byte_fallback",
- full_name="sentencepiece.TrainerSpec.byte_fallback",
- index=26,
- number=35,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=False,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="vocabulary_output_piece_score",
- full_name="sentencepiece.TrainerSpec.vocabulary_output_piece_score",
- index=27,
- number=32,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=True,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="hard_vocab_limit",
- full_name="sentencepiece.TrainerSpec.hard_vocab_limit",
- index=28,
- number=33,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=True,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="use_all_vocab",
- full_name="sentencepiece.TrainerSpec.use_all_vocab",
- index=29,
- number=34,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=False,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="unk_id",
- full_name="sentencepiece.TrainerSpec.unk_id",
- index=30,
- number=40,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="bos_id",
- full_name="sentencepiece.TrainerSpec.bos_id",
- index=31,
- number=41,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=1,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="eos_id",
- full_name="sentencepiece.TrainerSpec.eos_id",
- index=32,
- number=42,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=2,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="pad_id",
- full_name="sentencepiece.TrainerSpec.pad_id",
- index=33,
- number=43,
- type=5,
- cpp_type=1,
- label=1,
- has_default_value=True,
- default_value=-1,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="unk_piece",
- full_name="sentencepiece.TrainerSpec.unk_piece",
- index=34,
- number=45,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=True,
- default_value=b"<unk>".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="bos_piece",
- full_name="sentencepiece.TrainerSpec.bos_piece",
- index=35,
- number=46,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=True,
- default_value=b"<s>".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="eos_piece",
- full_name="sentencepiece.TrainerSpec.eos_piece",
- index=36,
- number=47,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=True,
- default_value=b"</s>".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="pad_piece",
- full_name="sentencepiece.TrainerSpec.pad_piece",
- index=37,
- number=48,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=True,
- default_value=b"<pad>".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="unk_surface",
- full_name="sentencepiece.TrainerSpec.unk_surface",
- index=38,
- number=44,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=True,
- default_value=b" \342\201\207 ".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="train_extremely_large_corpus",
- full_name="sentencepiece.TrainerSpec.train_extremely_large_corpus",
- index=39,
- number=49,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=False,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[
- _TRAINERSPEC_MODELTYPE,
- ],
- serialized_options=None,
- is_extendable=True,
- syntax="proto2",
- extension_ranges=[
- (200, 536870912),
- ],
- oneofs=[],
- serialized_start=45,
- serialized_end=1358,
- )
- _NORMALIZERSPEC = _descriptor.Descriptor(
- name="NormalizerSpec",
- full_name="sentencepiece.NormalizerSpec",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="name",
- full_name="sentencepiece.NormalizerSpec.name",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="precompiled_charsmap",
- full_name="sentencepiece.NormalizerSpec.precompiled_charsmap",
- index=1,
- number=2,
- type=12,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"",
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="add_dummy_prefix",
- full_name="sentencepiece.NormalizerSpec.add_dummy_prefix",
- index=2,
- number=3,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=True,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="remove_extra_whitespaces",
- full_name="sentencepiece.NormalizerSpec.remove_extra_whitespaces",
- index=3,
- number=4,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=True,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="escape_whitespaces",
- full_name="sentencepiece.NormalizerSpec.escape_whitespaces",
- index=4,
- number=5,
- type=8,
- cpp_type=7,
- label=1,
- has_default_value=True,
- default_value=True,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="normalization_rule_tsv",
- full_name="sentencepiece.NormalizerSpec.normalization_rule_tsv",
- index=5,
- number=6,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=True,
- syntax="proto2",
- extension_ranges=[
- (200, 536870912),
- ],
- oneofs=[],
- serialized_start=1361,
- serialized_end=1570,
- )
- _SELFTESTDATA_SAMPLE = _descriptor.Descriptor(
- name="Sample",
- full_name="sentencepiece.SelfTestData.Sample",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="input",
- full_name="sentencepiece.SelfTestData.Sample.input",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="expected",
- full_name="sentencepiece.SelfTestData.Sample.expected",
- index=1,
- number=2,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto2",
- extension_ranges=[],
- oneofs=[],
- serialized_start=1641,
- serialized_end=1682,
- )
- _SELFTESTDATA = _descriptor.Descriptor(
- name="SelfTestData",
- full_name="sentencepiece.SelfTestData",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="samples",
- full_name="sentencepiece.SelfTestData.samples",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[
- _SELFTESTDATA_SAMPLE,
- ],
- enum_types=[],
- serialized_options=None,
- is_extendable=True,
- syntax="proto2",
- extension_ranges=[
- (200, 536870912),
- ],
- oneofs=[],
- serialized_start=1572,
- serialized_end=1693,
- )
- _MODELPROTO_SENTENCEPIECE = _descriptor.Descriptor(
- name="SentencePiece",
- full_name="sentencepiece.ModelProto.SentencePiece",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="piece",
- full_name="sentencepiece.ModelProto.SentencePiece.piece",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="score",
- full_name="sentencepiece.ModelProto.SentencePiece.score",
- index=1,
- number=2,
- type=2,
- cpp_type=6,
- label=1,
- has_default_value=False,
- default_value=float(0),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="type",
- full_name="sentencepiece.ModelProto.SentencePiece.type",
- index=2,
- number=3,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=True,
- default_value=1,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[
- _MODELPROTO_SENTENCEPIECE_TYPE,
- ],
- serialized_options=None,
- is_extendable=True,
- syntax="proto2",
- extension_ranges=[
- (200, 536870912),
- ],
- oneofs=[],
- serialized_start=1985,
- serialized_end=2195,
- )
- _MODELPROTO = _descriptor.Descriptor(
- name="ModelProto",
- full_name="sentencepiece.ModelProto",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="pieces",
- full_name="sentencepiece.ModelProto.pieces",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="trainer_spec",
- full_name="sentencepiece.ModelProto.trainer_spec",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="normalizer_spec",
- full_name="sentencepiece.ModelProto.normalizer_spec",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="self_test_data",
- full_name="sentencepiece.ModelProto.self_test_data",
- index=3,
- number=4,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="denormalizer_spec",
- full_name="sentencepiece.ModelProto.denormalizer_spec",
- index=4,
- number=5,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[
- _MODELPROTO_SENTENCEPIECE,
- ],
- enum_types=[],
- serialized_options=None,
- is_extendable=True,
- syntax="proto2",
- extension_ranges=[
- (200, 536870912),
- ],
- oneofs=[],
- serialized_start=1696,
- serialized_end=2206,
- )
- _TRAINERSPEC.fields_by_name["model_type"].enum_type = _TRAINERSPEC_MODELTYPE
- _TRAINERSPEC_MODELTYPE.containing_type = _TRAINERSPEC
- _SELFTESTDATA_SAMPLE.containing_type = _SELFTESTDATA
- _SELFTESTDATA.fields_by_name["samples"].message_type = _SELFTESTDATA_SAMPLE
- _MODELPROTO_SENTENCEPIECE.fields_by_name["type"].enum_type = _MODELPROTO_SENTENCEPIECE_TYPE
- _MODELPROTO_SENTENCEPIECE.containing_type = _MODELPROTO
- _MODELPROTO_SENTENCEPIECE_TYPE.containing_type = _MODELPROTO_SENTENCEPIECE
- _MODELPROTO.fields_by_name["pieces"].message_type = _MODELPROTO_SENTENCEPIECE
- _MODELPROTO.fields_by_name["trainer_spec"].message_type = _TRAINERSPEC
- _MODELPROTO.fields_by_name["normalizer_spec"].message_type = _NORMALIZERSPEC
- _MODELPROTO.fields_by_name["self_test_data"].message_type = _SELFTESTDATA
- _MODELPROTO.fields_by_name["denormalizer_spec"].message_type = _NORMALIZERSPEC
- DESCRIPTOR.message_types_by_name["TrainerSpec"] = _TRAINERSPEC
- DESCRIPTOR.message_types_by_name["NormalizerSpec"] = _NORMALIZERSPEC
- DESCRIPTOR.message_types_by_name["SelfTestData"] = _SELFTESTDATA
- DESCRIPTOR.message_types_by_name["ModelProto"] = _MODELPROTO
- _sym_db.RegisterFileDescriptor(DESCRIPTOR)
- TrainerSpec = _reflection.GeneratedProtocolMessageType(
- "TrainerSpec",
- (_message.Message,),
- {
- "DESCRIPTOR": _TRAINERSPEC,
- "__module__": "sentencepiece_model_pb2",
- # @@protoc_insertion_point(class_scope:sentencepiece.TrainerSpec)
- },
- )
- _sym_db.RegisterMessage(TrainerSpec)
- NormalizerSpec = _reflection.GeneratedProtocolMessageType(
- "NormalizerSpec",
- (_message.Message,),
- {
- "DESCRIPTOR": _NORMALIZERSPEC,
- "__module__": "sentencepiece_model_pb2",
- # @@protoc_insertion_point(class_scope:sentencepiece.NormalizerSpec)
- },
- )
- _sym_db.RegisterMessage(NormalizerSpec)
- SelfTestData = _reflection.GeneratedProtocolMessageType(
- "SelfTestData",
- (_message.Message,),
- {
- "Sample": _reflection.GeneratedProtocolMessageType(
- "Sample",
- (_message.Message,),
- {
- "DESCRIPTOR": _SELFTESTDATA_SAMPLE,
- "__module__": "sentencepiece_model_pb2",
- # @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData.Sample)
- },
- ),
- "DESCRIPTOR": _SELFTESTDATA,
- "__module__": "sentencepiece_model_pb2",
- # @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData)
- },
- )
- _sym_db.RegisterMessage(SelfTestData)
- _sym_db.RegisterMessage(SelfTestData.Sample)
- ModelProto = _reflection.GeneratedProtocolMessageType(
- "ModelProto",
- (_message.Message,),
- {
- "SentencePiece": _reflection.GeneratedProtocolMessageType(
- "SentencePiece",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODELPROTO_SENTENCEPIECE,
- "__module__": "sentencepiece_model_pb2",
- # @@protoc_insertion_point(class_scope:sentencepiece.ModelProto.SentencePiece)
- },
- ),
- "DESCRIPTOR": _MODELPROTO,
- "__module__": "sentencepiece_model_pb2",
- # @@protoc_insertion_point(class_scope:sentencepiece.ModelProto)
- },
- )
- _sym_db.RegisterMessage(ModelProto)
- _sym_db.RegisterMessage(ModelProto.SentencePiece)
- DESCRIPTOR._options = None
- _TRAINERSPEC.fields_by_name["mining_sentence_size"]._options = None
- _TRAINERSPEC.fields_by_name["training_sentence_size"]._options = None
- # @@protoc_insertion_point(module_scope)
|