Class SentencepieceModel.TrainerSpec.Builder

java.lang.Object
com.google.protobuf.AbstractMessageLite.Builder
com.google.protobuf.AbstractMessage.Builder<BuilderT>
com.google.protobuf.GeneratedMessageV3.Builder<BuilderT>
com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
com.google.genai.proto.SentencepieceModel.TrainerSpec.Builder
All Implemented Interfaces:
SentencepieceModel.TrainerSpecOrBuilder, com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder<SentencepieceModel.TrainerSpec>, com.google.protobuf.Message.Builder, com.google.protobuf.MessageLite.Builder, com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder, Cloneable
Enclosing class:
SentencepieceModel.TrainerSpec

public static final class SentencepieceModel.TrainerSpec.Builder extends com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder> implements SentencepieceModel.TrainerSpecOrBuilder
 TrainerSpec encodes a various parameters for SentencePiece training.
 Next id: 55
 
Protobuf type com.google.genai.proto.TrainerSpec
  • Method Details

    • getDescriptor

      public static final com.google.protobuf.Descriptors.Descriptor getDescriptor()
    • clear

      Specified by:
      clear in interface com.google.protobuf.Message.Builder
      Specified by:
      clear in interface com.google.protobuf.MessageLite.Builder
      Overrides:
      clear in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • getDescriptorForType

      public com.google.protobuf.Descriptors.Descriptor getDescriptorForType()
      Specified by:
      getDescriptorForType in interface com.google.protobuf.Message.Builder
      Specified by:
      getDescriptorForType in interface com.google.protobuf.MessageOrBuilder
      Overrides:
      getDescriptorForType in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>
    • getDefaultInstanceForType

      public SentencepieceModel.TrainerSpec getDefaultInstanceForType()
      Specified by:
      getDefaultInstanceForType in interface com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder<SentencepieceModel.TrainerSpec>
      Specified by:
      getDefaultInstanceForType in interface com.google.protobuf.MessageLiteOrBuilder
      Specified by:
      getDefaultInstanceForType in interface com.google.protobuf.MessageOrBuilder
    • build

      Specified by:
      build in interface com.google.protobuf.Message.Builder
      Specified by:
      build in interface com.google.protobuf.MessageLite.Builder
    • buildPartial

      public SentencepieceModel.TrainerSpec buildPartial()
      Specified by:
      buildPartial in interface com.google.protobuf.Message.Builder
      Specified by:
      buildPartial in interface com.google.protobuf.MessageLite.Builder
    • clone

      Specified by:
      clone in interface com.google.protobuf.Message.Builder
      Specified by:
      clone in interface com.google.protobuf.MessageLite.Builder
      Overrides:
      clone in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>
    • setField

      public SentencepieceModel.TrainerSpec.Builder setField(com.google.protobuf.Descriptors.FieldDescriptor field, Object value)
      Specified by:
      setField in interface com.google.protobuf.Message.Builder
      Overrides:
      setField in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • clearField

      public SentencepieceModel.TrainerSpec.Builder clearField(com.google.protobuf.Descriptors.FieldDescriptor field)
      Specified by:
      clearField in interface com.google.protobuf.Message.Builder
      Overrides:
      clearField in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • clearOneof

      public SentencepieceModel.TrainerSpec.Builder clearOneof(com.google.protobuf.Descriptors.OneofDescriptor oneof)
      Specified by:
      clearOneof in interface com.google.protobuf.Message.Builder
      Overrides:
      clearOneof in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>
    • setRepeatedField

      public SentencepieceModel.TrainerSpec.Builder setRepeatedField(com.google.protobuf.Descriptors.FieldDescriptor field, int index, Object value)
      Specified by:
      setRepeatedField in interface com.google.protobuf.Message.Builder
      Overrides:
      setRepeatedField in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • addRepeatedField

      public SentencepieceModel.TrainerSpec.Builder addRepeatedField(com.google.protobuf.Descriptors.FieldDescriptor field, Object value)
      Specified by:
      addRepeatedField in interface com.google.protobuf.Message.Builder
      Overrides:
      addRepeatedField in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • setExtension

      public <Type> SentencepieceModel.TrainerSpec.Builder setExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,Type> extension, Type value)
      Overrides:
      setExtension in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • setExtension

      public <Type> SentencepieceModel.TrainerSpec.Builder setExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,List<Type>> extension, int index, Type value)
      Overrides:
      setExtension in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • addExtension

      public <Type> SentencepieceModel.TrainerSpec.Builder addExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,List<Type>> extension, Type value)
      Overrides:
      addExtension in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • clearExtension

      public <T> SentencepieceModel.TrainerSpec.Builder clearExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,T> extension)
      Overrides:
      clearExtension in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • mergeFrom

      public SentencepieceModel.TrainerSpec.Builder mergeFrom(com.google.protobuf.Message other)
      Specified by:
      mergeFrom in interface com.google.protobuf.Message.Builder
      Overrides:
      mergeFrom in class com.google.protobuf.AbstractMessage.Builder<SentencepieceModel.TrainerSpec.Builder>
    • mergeFrom

    • isInitialized

      public final boolean isInitialized()
      Specified by:
      isInitialized in interface com.google.protobuf.MessageLiteOrBuilder
      Overrides:
      isInitialized in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
    • mergeFrom

      public SentencepieceModel.TrainerSpec.Builder mergeFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws IOException
      Specified by:
      mergeFrom in interface com.google.protobuf.Message.Builder
      Specified by:
      mergeFrom in interface com.google.protobuf.MessageLite.Builder
      Overrides:
      mergeFrom in class com.google.protobuf.AbstractMessage.Builder<SentencepieceModel.TrainerSpec.Builder>
      Throws:
      IOException
    • getInputList

      public com.google.protobuf.ProtocolStringList getInputList()
      /////////////////////////////////////////////////////////////////
       General parameters
      
       Input corpus files.
        Trainer accepts the following two formats:
        A) Monolingual: plain text, one sentence per line.
        B) Bilingual:   TSV, source sentence <tab> target sentence
        When bilingual data is passed, shared vocabulary model is built.
        Note that the input file must be raw corpus, not a preprocessed corpus.
        Trainer only loads the first `input_sentence_size` sentences specified
        with this parameter.
       
      repeated string input = 1;
      Specified by:
      getInputList in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      A list containing the input.
    • getInputCount

      public int getInputCount()
      /////////////////////////////////////////////////////////////////
       General parameters
      
       Input corpus files.
        Trainer accepts the following two formats:
        A) Monolingual: plain text, one sentence per line.
        B) Bilingual:   TSV, source sentence <tab> target sentence
        When bilingual data is passed, shared vocabulary model is built.
        Note that the input file must be raw corpus, not a preprocessed corpus.
        Trainer only loads the first `input_sentence_size` sentences specified
        with this parameter.
       
      repeated string input = 1;
      Specified by:
      getInputCount in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The count of input.
    • getInput

      public String getInput(int index)
      /////////////////////////////////////////////////////////////////
       General parameters
      
       Input corpus files.
        Trainer accepts the following two formats:
        A) Monolingual: plain text, one sentence per line.
        B) Bilingual:   TSV, source sentence <tab> target sentence
        When bilingual data is passed, shared vocabulary model is built.
        Note that the input file must be raw corpus, not a preprocessed corpus.
        Trainer only loads the first `input_sentence_size` sentences specified
        with this parameter.
       
      repeated string input = 1;
      Specified by:
      getInput in interface SentencepieceModel.TrainerSpecOrBuilder
      Parameters:
      index - The index of the element to return.
      Returns:
      The input at the given index.
    • getInputBytes

      public com.google.protobuf.ByteString getInputBytes(int index)
      /////////////////////////////////////////////////////////////////
       General parameters
      
       Input corpus files.
        Trainer accepts the following two formats:
        A) Monolingual: plain text, one sentence per line.
        B) Bilingual:   TSV, source sentence <tab> target sentence
        When bilingual data is passed, shared vocabulary model is built.
        Note that the input file must be raw corpus, not a preprocessed corpus.
        Trainer only loads the first `input_sentence_size` sentences specified
        with this parameter.
       
      repeated string input = 1;
      Specified by:
      getInputBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Parameters:
      index - The index of the value to return.
      Returns:
      The bytes of the input at the given index.
    • setInput

      public SentencepieceModel.TrainerSpec.Builder setInput(int index, String value)
      /////////////////////////////////////////////////////////////////
       General parameters
      
       Input corpus files.
        Trainer accepts the following two formats:
        A) Monolingual: plain text, one sentence per line.
        B) Bilingual:   TSV, source sentence <tab> target sentence
        When bilingual data is passed, shared vocabulary model is built.
        Note that the input file must be raw corpus, not a preprocessed corpus.
        Trainer only loads the first `input_sentence_size` sentences specified
        with this parameter.
       
      repeated string input = 1;
      Parameters:
      index - The index to set the value at.
      value - The input to set.
      Returns:
      This builder for chaining.
    • addInput

      /////////////////////////////////////////////////////////////////
       General parameters
      
       Input corpus files.
        Trainer accepts the following two formats:
        A) Monolingual: plain text, one sentence per line.
        B) Bilingual:   TSV, source sentence <tab> target sentence
        When bilingual data is passed, shared vocabulary model is built.
        Note that the input file must be raw corpus, not a preprocessed corpus.
        Trainer only loads the first `input_sentence_size` sentences specified
        with this parameter.
       
      repeated string input = 1;
      Parameters:
      value - The input to add.
      Returns:
      This builder for chaining.
    • addAllInput

      /////////////////////////////////////////////////////////////////
       General parameters
      
       Input corpus files.
        Trainer accepts the following two formats:
        A) Monolingual: plain text, one sentence per line.
        B) Bilingual:   TSV, source sentence <tab> target sentence
        When bilingual data is passed, shared vocabulary model is built.
        Note that the input file must be raw corpus, not a preprocessed corpus.
        Trainer only loads the first `input_sentence_size` sentences specified
        with this parameter.
       
      repeated string input = 1;
      Parameters:
      values - The input to add.
      Returns:
      This builder for chaining.
    • clearInput

      /////////////////////////////////////////////////////////////////
       General parameters
      
       Input corpus files.
        Trainer accepts the following two formats:
        A) Monolingual: plain text, one sentence per line.
        B) Bilingual:   TSV, source sentence <tab> target sentence
        When bilingual data is passed, shared vocabulary model is built.
        Note that the input file must be raw corpus, not a preprocessed corpus.
        Trainer only loads the first `input_sentence_size` sentences specified
        with this parameter.
       
      repeated string input = 1;
      Returns:
      This builder for chaining.
    • addInputBytes

      public SentencepieceModel.TrainerSpec.Builder addInputBytes(com.google.protobuf.ByteString value)
      /////////////////////////////////////////////////////////////////
       General parameters
      
       Input corpus files.
        Trainer accepts the following two formats:
        A) Monolingual: plain text, one sentence per line.
        B) Bilingual:   TSV, source sentence <tab> target sentence
        When bilingual data is passed, shared vocabulary model is built.
        Note that the input file must be raw corpus, not a preprocessed corpus.
        Trainer only loads the first `input_sentence_size` sentences specified
        with this parameter.
       
      repeated string input = 1;
      Parameters:
      value - The bytes of the input to add.
      Returns:
      This builder for chaining.
    • hasInputFormat

      public boolean hasInputFormat()
       Input corpus format:
       "text": one-sentence-per-line text format (default)
       "tsv":  sentence <tab> freq
       
      optional string input_format = 7;
      Specified by:
      hasInputFormat in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the inputFormat field is set.
    • getInputFormat

      public String getInputFormat()
       Input corpus format:
       "text": one-sentence-per-line text format (default)
       "tsv":  sentence <tab> freq
       
      optional string input_format = 7;
      Specified by:
      getInputFormat in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The inputFormat.
    • getInputFormatBytes

      public com.google.protobuf.ByteString getInputFormatBytes()
       Input corpus format:
       "text": one-sentence-per-line text format (default)
       "tsv":  sentence <tab> freq
       
      optional string input_format = 7;
      Specified by:
      getInputFormatBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for inputFormat.
    • setInputFormat

      public SentencepieceModel.TrainerSpec.Builder setInputFormat(String value)
       Input corpus format:
       "text": one-sentence-per-line text format (default)
       "tsv":  sentence <tab> freq
       
      optional string input_format = 7;
      Parameters:
      value - The inputFormat to set.
      Returns:
      This builder for chaining.
    • clearInputFormat

      public SentencepieceModel.TrainerSpec.Builder clearInputFormat()
       Input corpus format:
       "text": one-sentence-per-line text format (default)
       "tsv":  sentence <tab> freq
       
      optional string input_format = 7;
      Returns:
      This builder for chaining.
    • setInputFormatBytes

      public SentencepieceModel.TrainerSpec.Builder setInputFormatBytes(com.google.protobuf.ByteString value)
       Input corpus format:
       "text": one-sentence-per-line text format (default)
       "tsv":  sentence <tab> freq
       
      optional string input_format = 7;
      Parameters:
      value - The bytes for inputFormat to set.
      Returns:
      This builder for chaining.
    • hasModelPrefix

      public boolean hasModelPrefix()
       Output model file prefix.
       <model_prefix>.model and <model_prefix>.vocab are generated.
       
      optional string model_prefix = 2;
      Specified by:
      hasModelPrefix in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the modelPrefix field is set.
    • getModelPrefix

      public String getModelPrefix()
       Output model file prefix.
       <model_prefix>.model and <model_prefix>.vocab are generated.
       
      optional string model_prefix = 2;
      Specified by:
      getModelPrefix in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The modelPrefix.
    • getModelPrefixBytes

      public com.google.protobuf.ByteString getModelPrefixBytes()
       Output model file prefix.
       <model_prefix>.model and <model_prefix>.vocab are generated.
       
      optional string model_prefix = 2;
      Specified by:
      getModelPrefixBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for modelPrefix.
    • setModelPrefix

      public SentencepieceModel.TrainerSpec.Builder setModelPrefix(String value)
       Output model file prefix.
       <model_prefix>.model and <model_prefix>.vocab are generated.
       
      optional string model_prefix = 2;
      Parameters:
      value - The modelPrefix to set.
      Returns:
      This builder for chaining.
    • clearModelPrefix

      public SentencepieceModel.TrainerSpec.Builder clearModelPrefix()
       Output model file prefix.
       <model_prefix>.model and <model_prefix>.vocab are generated.
       
      optional string model_prefix = 2;
      Returns:
      This builder for chaining.
    • setModelPrefixBytes

      public SentencepieceModel.TrainerSpec.Builder setModelPrefixBytes(com.google.protobuf.ByteString value)
       Output model file prefix.
       <model_prefix>.model and <model_prefix>.vocab are generated.
       
      optional string model_prefix = 2;
      Parameters:
      value - The bytes for modelPrefix to set.
      Returns:
      This builder for chaining.
    • hasModelType

      public boolean hasModelType()
      optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
      Specified by:
      hasModelType in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the modelType field is set.
    • getModelType

      optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
      Specified by:
      getModelType in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The modelType.
    • setModelType

      optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
      Parameters:
      value - The modelType to set.
      Returns:
      This builder for chaining.
    • clearModelType

      public SentencepieceModel.TrainerSpec.Builder clearModelType()
      optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
      Returns:
      This builder for chaining.
    • hasVocabSize

      public boolean hasVocabSize()
       Vocabulary size. 8k is the default size.
       
      optional int32 vocab_size = 4 [default = 8000];
      Specified by:
      hasVocabSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the vocabSize field is set.
    • getVocabSize

      public int getVocabSize()
       Vocabulary size. 8k is the default size.
       
      optional int32 vocab_size = 4 [default = 8000];
      Specified by:
      getVocabSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The vocabSize.
    • setVocabSize

      public SentencepieceModel.TrainerSpec.Builder setVocabSize(int value)
       Vocabulary size. 8k is the default size.
       
      optional int32 vocab_size = 4 [default = 8000];
      Parameters:
      value - The vocabSize to set.
      Returns:
      This builder for chaining.
    • clearVocabSize

      public SentencepieceModel.TrainerSpec.Builder clearVocabSize()
       Vocabulary size. 8k is the default size.
       
      optional int32 vocab_size = 4 [default = 8000];
      Returns:
      This builder for chaining.
    • getAcceptLanguageList

      public com.google.protobuf.ProtocolStringList getAcceptLanguageList()
       List of the languages this model can accept.
       Since the model is language-agnostic, this field is used as a reference.
       
      repeated string accept_language = 5;
      Specified by:
      getAcceptLanguageList in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      A list containing the acceptLanguage.
    • getAcceptLanguageCount

      public int getAcceptLanguageCount()
       List of the languages this model can accept.
       Since the model is language-agnostic, this field is used as a reference.
       
      repeated string accept_language = 5;
      Specified by:
      getAcceptLanguageCount in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The count of acceptLanguage.
    • getAcceptLanguage

      public String getAcceptLanguage(int index)
       List of the languages this model can accept.
       Since the model is language-agnostic, this field is used as a reference.
       
      repeated string accept_language = 5;
      Specified by:
      getAcceptLanguage in interface SentencepieceModel.TrainerSpecOrBuilder
      Parameters:
      index - The index of the element to return.
      Returns:
      The acceptLanguage at the given index.
    • getAcceptLanguageBytes

      public com.google.protobuf.ByteString getAcceptLanguageBytes(int index)
       List of the languages this model can accept.
       Since the model is language-agnostic, this field is used as a reference.
       
      repeated string accept_language = 5;
      Specified by:
      getAcceptLanguageBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Parameters:
      index - The index of the value to return.
      Returns:
      The bytes of the acceptLanguage at the given index.
    • setAcceptLanguage

      public SentencepieceModel.TrainerSpec.Builder setAcceptLanguage(int index, String value)
       List of the languages this model can accept.
       Since the model is language-agnostic, this field is used as a reference.
       
      repeated string accept_language = 5;
      Parameters:
      index - The index to set the value at.
      value - The acceptLanguage to set.
      Returns:
      This builder for chaining.
    • addAcceptLanguage

      public SentencepieceModel.TrainerSpec.Builder addAcceptLanguage(String value)
       List of the languages this model can accept.
       Since the model is language-agnostic, this field is used as a reference.
       
      repeated string accept_language = 5;
      Parameters:
      value - The acceptLanguage to add.
      Returns:
      This builder for chaining.
    • addAllAcceptLanguage

      public SentencepieceModel.TrainerSpec.Builder addAllAcceptLanguage(Iterable<String> values)
       List of the languages this model can accept.
       Since the model is language-agnostic, this field is used as a reference.
       
      repeated string accept_language = 5;
      Parameters:
      values - The acceptLanguage to add.
      Returns:
      This builder for chaining.
    • clearAcceptLanguage

      public SentencepieceModel.TrainerSpec.Builder clearAcceptLanguage()
       List of the languages this model can accept.
       Since the model is language-agnostic, this field is used as a reference.
       
      repeated string accept_language = 5;
      Returns:
      This builder for chaining.
    • addAcceptLanguageBytes

      public SentencepieceModel.TrainerSpec.Builder addAcceptLanguageBytes(com.google.protobuf.ByteString value)
       List of the languages this model can accept.
       Since the model is language-agnostic, this field is used as a reference.
       
      repeated string accept_language = 5;
      Parameters:
      value - The bytes of the acceptLanguage to add.
      Returns:
      This builder for chaining.
    • hasSelfTestSampleSize

      public boolean hasSelfTestSampleSize()
       Size of self-test samples, which are encoded in the model file.
       
      optional int32 self_test_sample_size = 6 [default = 0];
      Specified by:
      hasSelfTestSampleSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the selfTestSampleSize field is set.
    • getSelfTestSampleSize

      public int getSelfTestSampleSize()
       Size of self-test samples, which are encoded in the model file.
       
      optional int32 self_test_sample_size = 6 [default = 0];
      Specified by:
      getSelfTestSampleSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The selfTestSampleSize.
    • setSelfTestSampleSize

      public SentencepieceModel.TrainerSpec.Builder setSelfTestSampleSize(int value)
       Size of self-test samples, which are encoded in the model file.
       
      optional int32 self_test_sample_size = 6 [default = 0];
      Parameters:
      value - The selfTestSampleSize to set.
      Returns:
      This builder for chaining.
    • clearSelfTestSampleSize

      public SentencepieceModel.TrainerSpec.Builder clearSelfTestSampleSize()
       Size of self-test samples, which are encoded in the model file.
       
      optional int32 self_test_sample_size = 6 [default = 0];
      Returns:
      This builder for chaining.
    • hasEnableDifferentialPrivacy

      public boolean hasEnableDifferentialPrivacy()
       Whether to use DP version of sentencepiece. Use it with TSV input format
       (requires precomputed word tab counts to work).
       
      optional bool enable_differential_privacy = 50 [default = false];
      Specified by:
      hasEnableDifferentialPrivacy in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the enableDifferentialPrivacy field is set.
    • getEnableDifferentialPrivacy

      public boolean getEnableDifferentialPrivacy()
       Whether to use DP version of sentencepiece. Use it with TSV input format
       (requires precomputed word tab counts to work).
       
      optional bool enable_differential_privacy = 50 [default = false];
      Specified by:
      getEnableDifferentialPrivacy in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The enableDifferentialPrivacy.
    • setEnableDifferentialPrivacy

      public SentencepieceModel.TrainerSpec.Builder setEnableDifferentialPrivacy(boolean value)
       Whether to use DP version of sentencepiece. Use it with TSV input format
       (requires precomputed word tab counts to work).
       
      optional bool enable_differential_privacy = 50 [default = false];
      Parameters:
      value - The enableDifferentialPrivacy to set.
      Returns:
      This builder for chaining.
    • clearEnableDifferentialPrivacy

      public SentencepieceModel.TrainerSpec.Builder clearEnableDifferentialPrivacy()
       Whether to use DP version of sentencepiece. Use it with TSV input format
       (requires precomputed word tab counts to work).
       
      optional bool enable_differential_privacy = 50 [default = false];
      Returns:
      This builder for chaining.
    • hasDifferentialPrivacyNoiseLevel

      public boolean hasDifferentialPrivacyNoiseLevel()
       Set these parameters if you need DP version of sentencepiece.
       std of noise to add.
       
      optional float differential_privacy_noise_level = 51 [default = 0];
      Specified by:
      hasDifferentialPrivacyNoiseLevel in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the differentialPrivacyNoiseLevel field is set.
    • getDifferentialPrivacyNoiseLevel

      public float getDifferentialPrivacyNoiseLevel()
       Set these parameters if you need DP version of sentencepiece.
       std of noise to add.
       
      optional float differential_privacy_noise_level = 51 [default = 0];
      Specified by:
      getDifferentialPrivacyNoiseLevel in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The differentialPrivacyNoiseLevel.
    • setDifferentialPrivacyNoiseLevel

      public SentencepieceModel.TrainerSpec.Builder setDifferentialPrivacyNoiseLevel(float value)
       Set these parameters if you need DP version of sentencepiece.
       std of noise to add.
       
      optional float differential_privacy_noise_level = 51 [default = 0];
      Parameters:
      value - The differentialPrivacyNoiseLevel to set.
      Returns:
      This builder for chaining.
    • clearDifferentialPrivacyNoiseLevel

      public SentencepieceModel.TrainerSpec.Builder clearDifferentialPrivacyNoiseLevel()
       Set these parameters if you need DP version of sentencepiece.
       std of noise to add.
       
      optional float differential_privacy_noise_level = 51 [default = 0];
      Returns:
      This builder for chaining.
    • hasDifferentialPrivacyClippingThreshold

      public boolean hasDifferentialPrivacyClippingThreshold()
       Clipping threshold to apply after adding noise. All the words with
       frequency less than this value are dropped.
       
      optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
      Specified by:
      hasDifferentialPrivacyClippingThreshold in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the differentialPrivacyClippingThreshold field is set.
    • getDifferentialPrivacyClippingThreshold

      public long getDifferentialPrivacyClippingThreshold()
       Clipping threshold to apply after adding noise. All the words with
       frequency less than this value are dropped.
       
      optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
      Specified by:
      getDifferentialPrivacyClippingThreshold in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The differentialPrivacyClippingThreshold.
    • setDifferentialPrivacyClippingThreshold

      public SentencepieceModel.TrainerSpec.Builder setDifferentialPrivacyClippingThreshold(long value)
       Clipping threshold to apply after adding noise. All the words with
       frequency less than this value are dropped.
       
      optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
      Parameters:
      value - The differentialPrivacyClippingThreshold to set.
      Returns:
      This builder for chaining.
    • clearDifferentialPrivacyClippingThreshold

      public SentencepieceModel.TrainerSpec.Builder clearDifferentialPrivacyClippingThreshold()
       Clipping threshold to apply after adding noise. All the words with
       frequency less than this value are dropped.
       
      optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
      Returns:
      This builder for chaining.
    • hasCharacterCoverage

      public boolean hasCharacterCoverage()
      /////////////////////////////////////////////////////////////////
       Training parameters.
      
       Uses characters which cover the corpus with the ratio of `chars_coverage`.
       This parameter determines the set of basic Alphabet of sentence piece.
       1.0 - `chars_coverage` characters are treated as UNK.
       See also required_chars field.
       
      optional float character_coverage = 10 [default = 0.9995];
      Specified by:
      hasCharacterCoverage in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the characterCoverage field is set.
    • getCharacterCoverage

      public float getCharacterCoverage()
      /////////////////////////////////////////////////////////////////
       Training parameters.
      
       Uses characters which cover the corpus with the ratio of `chars_coverage`.
       This parameter determines the set of basic Alphabet of sentence piece.
       1.0 - `chars_coverage` characters are treated as UNK.
       See also required_chars field.
       
      optional float character_coverage = 10 [default = 0.9995];
      Specified by:
      getCharacterCoverage in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The characterCoverage.
    • setCharacterCoverage

      public SentencepieceModel.TrainerSpec.Builder setCharacterCoverage(float value)
      /////////////////////////////////////////////////////////////////
       Training parameters.
      
       Uses characters which cover the corpus with the ratio of `chars_coverage`.
       This parameter determines the set of basic Alphabet of sentence piece.
       1.0 - `chars_coverage` characters are treated as UNK.
       See also required_chars field.
       
      optional float character_coverage = 10 [default = 0.9995];
      Parameters:
      value - The characterCoverage to set.
      Returns:
      This builder for chaining.
    • clearCharacterCoverage

      public SentencepieceModel.TrainerSpec.Builder clearCharacterCoverage()
      /////////////////////////////////////////////////////////////////
       Training parameters.
      
       Uses characters which cover the corpus with the ratio of `chars_coverage`.
       This parameter determines the set of basic Alphabet of sentence piece.
       1.0 - `chars_coverage` characters are treated as UNK.
       See also required_chars field.
       
      optional float character_coverage = 10 [default = 0.9995];
      Returns:
      This builder for chaining.
    • hasInputSentenceSize

      public boolean hasInputSentenceSize()
       Maximum size of sentences the trainer loads from `input` parameter.
       Trainer simply loads the `input` files in sequence.
       It is better to shuffle the input corpus randomly.
       
      optional uint64 input_sentence_size = 11 [default = 0];
      Specified by:
      hasInputSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the inputSentenceSize field is set.
    • getInputSentenceSize

      public long getInputSentenceSize()
       Maximum size of sentences the trainer loads from `input` parameter.
       Trainer simply loads the `input` files in sequence.
       It is better to shuffle the input corpus randomly.
       
      optional uint64 input_sentence_size = 11 [default = 0];
      Specified by:
      getInputSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The inputSentenceSize.
    • setInputSentenceSize

      public SentencepieceModel.TrainerSpec.Builder setInputSentenceSize(long value)
       Maximum size of sentences the trainer loads from `input` parameter.
       Trainer simply loads the `input` files in sequence.
       It is better to shuffle the input corpus randomly.
       
      optional uint64 input_sentence_size = 11 [default = 0];
      Parameters:
      value - The inputSentenceSize to set.
      Returns:
      This builder for chaining.
    • clearInputSentenceSize

      public SentencepieceModel.TrainerSpec.Builder clearInputSentenceSize()
       Maximum size of sentences the trainer loads from `input` parameter.
       Trainer simply loads the `input` files in sequence.
       It is better to shuffle the input corpus randomly.
       
      optional uint64 input_sentence_size = 11 [default = 0];
      Returns:
      This builder for chaining.
    • hasShuffleInputSentence

      public boolean hasShuffleInputSentence()
      optional bool shuffle_input_sentence = 19 [default = true];
      Specified by:
      hasShuffleInputSentence in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the shuffleInputSentence field is set.
    • getShuffleInputSentence

      public boolean getShuffleInputSentence()
      optional bool shuffle_input_sentence = 19 [default = true];
      Specified by:
      getShuffleInputSentence in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The shuffleInputSentence.
    • setShuffleInputSentence

      public SentencepieceModel.TrainerSpec.Builder setShuffleInputSentence(boolean value)
      optional bool shuffle_input_sentence = 19 [default = true];
      Parameters:
      value - The shuffleInputSentence to set.
      Returns:
      This builder for chaining.
    • clearShuffleInputSentence

      public SentencepieceModel.TrainerSpec.Builder clearShuffleInputSentence()
      optional bool shuffle_input_sentence = 19 [default = true];
      Returns:
      This builder for chaining.
    • hasMiningSentenceSize

      @Deprecated public boolean hasMiningSentenceSize()
      Deprecated.
      com.google.genai.proto.TrainerSpec.mining_sentence_size is deprecated. See sentencepiece_model.proto;l=96
       Maximum size of sentences to make seed sentence pieces.
       Extended suffix array is constructed to extract frequent
       sub-strings from the corpus. This uses 20N working space,
       where N is the size of corpus.
       
      optional int32 mining_sentence_size = 12 [deprecated = true];
      Specified by:
      hasMiningSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the miningSentenceSize field is set.
    • getMiningSentenceSize

      @Deprecated public int getMiningSentenceSize()
      Deprecated.
      com.google.genai.proto.TrainerSpec.mining_sentence_size is deprecated. See sentencepiece_model.proto;l=96
       Maximum size of sentences to make seed sentence pieces.
       Extended suffix array is constructed to extract frequent
       sub-strings from the corpus. This uses 20N working space,
       where N is the size of corpus.
       
      optional int32 mining_sentence_size = 12 [deprecated = true];
      Specified by:
      getMiningSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The miningSentenceSize.
    • setMiningSentenceSize

      @Deprecated public SentencepieceModel.TrainerSpec.Builder setMiningSentenceSize(int value)
      Deprecated.
       Maximum size of sentences to make seed sentence pieces.
       Extended suffix array is constructed to extract frequent
       sub-strings from the corpus. This uses 20N working space,
       where N is the size of corpus.
       
      optional int32 mining_sentence_size = 12 [deprecated = true];
      Parameters:
      value - The miningSentenceSize to set.
      Returns:
      This builder for chaining.
    • clearMiningSentenceSize

      @Deprecated public SentencepieceModel.TrainerSpec.Builder clearMiningSentenceSize()
      Deprecated.
       Maximum size of sentences to make seed sentence pieces.
       Extended suffix array is constructed to extract frequent
       sub-strings from the corpus. This uses 20N working space,
       where N is the size of corpus.
       
      optional int32 mining_sentence_size = 12 [deprecated = true];
      Returns:
      This builder for chaining.
    • hasTrainingSentenceSize

      @Deprecated public boolean hasTrainingSentenceSize()
      Deprecated.
      com.google.genai.proto.TrainerSpec.training_sentence_size is deprecated. See sentencepiece_model.proto;l=99
       Maximum size of sentences to train sentence pieces.
       
      optional int32 training_sentence_size = 13 [deprecated = true];
      Specified by:
      hasTrainingSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the trainingSentenceSize field is set.
    • getTrainingSentenceSize

      @Deprecated public int getTrainingSentenceSize()
      Deprecated.
      com.google.genai.proto.TrainerSpec.training_sentence_size is deprecated. See sentencepiece_model.proto;l=99
       Maximum size of sentences to train sentence pieces.
       
      optional int32 training_sentence_size = 13 [deprecated = true];
      Specified by:
      getTrainingSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The trainingSentenceSize.
    • setTrainingSentenceSize

      @Deprecated public SentencepieceModel.TrainerSpec.Builder setTrainingSentenceSize(int value)
      Deprecated.
       Maximum size of sentences to train sentence pieces.
       
      optional int32 training_sentence_size = 13 [deprecated = true];
      Parameters:
      value - The trainingSentenceSize to set.
      Returns:
      This builder for chaining.
    • clearTrainingSentenceSize

      @Deprecated public SentencepieceModel.TrainerSpec.Builder clearTrainingSentenceSize()
      Deprecated.
       Maximum size of sentences to train sentence pieces.
       
      optional int32 training_sentence_size = 13 [deprecated = true];
      Returns:
      This builder for chaining.
    • hasSeedSentencepieceSize

      public boolean hasSeedSentencepieceSize()
       The size of seed sentencepieces.
       `seed_sentencepiece_size` must be larger than `vocab_size`.
       
      optional int32 seed_sentencepiece_size = 14 [default = 1000000];
      Specified by:
      hasSeedSentencepieceSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the seedSentencepieceSize field is set.
    • getSeedSentencepieceSize

      public int getSeedSentencepieceSize()
       The size of seed sentencepieces.
       `seed_sentencepiece_size` must be larger than `vocab_size`.
       
      optional int32 seed_sentencepiece_size = 14 [default = 1000000];
      Specified by:
      getSeedSentencepieceSize in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The seedSentencepieceSize.
    • setSeedSentencepieceSize

      public SentencepieceModel.TrainerSpec.Builder setSeedSentencepieceSize(int value)
       The size of seed sentencepieces.
       `seed_sentencepiece_size` must be larger than `vocab_size`.
       
      optional int32 seed_sentencepiece_size = 14 [default = 1000000];
      Parameters:
      value - The seedSentencepieceSize to set.
      Returns:
      This builder for chaining.
    • clearSeedSentencepieceSize

      public SentencepieceModel.TrainerSpec.Builder clearSeedSentencepieceSize()
       The size of seed sentencepieces.
       `seed_sentencepiece_size` must be larger than `vocab_size`.
       
      optional int32 seed_sentencepiece_size = 14 [default = 1000000];
      Returns:
      This builder for chaining.
    • hasShrinkingFactor

      public boolean hasShrinkingFactor()
       In every EM sub-iterations, keeps top
       `shrinking_factor` * `current sentencepieces size` with respect to
       the loss of the sentence piece. This value should be smaller than 1.0.
       
      optional float shrinking_factor = 15 [default = 0.75];
      Specified by:
      hasShrinkingFactor in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the shrinkingFactor field is set.
    • getShrinkingFactor

      public float getShrinkingFactor()
       In every EM sub-iterations, keeps top
       `shrinking_factor` * `current sentencepieces size` with respect to
       the loss of the sentence piece. This value should be smaller than 1.0.
       
      optional float shrinking_factor = 15 [default = 0.75];
      Specified by:
      getShrinkingFactor in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The shrinkingFactor.
    • setShrinkingFactor

      public SentencepieceModel.TrainerSpec.Builder setShrinkingFactor(float value)
       In every EM sub-iterations, keeps top
       `shrinking_factor` * `current sentencepieces size` with respect to
       the loss of the sentence piece. This value should be smaller than 1.0.
       
      optional float shrinking_factor = 15 [default = 0.75];
      Parameters:
      value - The shrinkingFactor to set.
      Returns:
      This builder for chaining.
    • clearShrinkingFactor

      public SentencepieceModel.TrainerSpec.Builder clearShrinkingFactor()
       In every EM sub-iterations, keeps top
       `shrinking_factor` * `current sentencepieces size` with respect to
       the loss of the sentence piece. This value should be smaller than 1.0.
       
      optional float shrinking_factor = 15 [default = 0.75];
      Returns:
      This builder for chaining.
    • hasMaxSentenceLength

      public boolean hasMaxSentenceLength()
       The maximum sentence length in byte. The sentences with the length
       larger than `max_sentence_length` is simply ignored.
       Longer input tends to bring the following risks:
        * Overflow during EM training (unigram language model only)
        * Performance drop because of O(n log n) cost in BPE.
       
      optional int32 max_sentence_length = 18 [default = 4192];
      Specified by:
      hasMaxSentenceLength in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the maxSentenceLength field is set.
    • getMaxSentenceLength

      public int getMaxSentenceLength()
       The maximum sentence length in byte. The sentences with the length
       larger than `max_sentence_length` is simply ignored.
       Longer input tends to bring the following risks:
        * Overflow during EM training (unigram language model only)
        * Performance drop because of O(n log n) cost in BPE.
       
      optional int32 max_sentence_length = 18 [default = 4192];
      Specified by:
      getMaxSentenceLength in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The maxSentenceLength.
    • setMaxSentenceLength

      public SentencepieceModel.TrainerSpec.Builder setMaxSentenceLength(int value)
       The maximum sentence length in byte. The sentences with the length
       larger than `max_sentence_length` is simply ignored.
       Longer input tends to bring the following risks:
        * Overflow during EM training (unigram language model only)
        * Performance drop because of O(n log n) cost in BPE.
       
      optional int32 max_sentence_length = 18 [default = 4192];
      Parameters:
      value - The maxSentenceLength to set.
      Returns:
      This builder for chaining.
    • clearMaxSentenceLength

      public SentencepieceModel.TrainerSpec.Builder clearMaxSentenceLength()
       The maximum sentence length in byte. The sentences with the length
       larger than `max_sentence_length` is simply ignored.
       Longer input tends to bring the following risks:
        * Overflow during EM training (unigram language model only)
        * Performance drop because of O(n log n) cost in BPE.
       
      optional int32 max_sentence_length = 18 [default = 4192];
      Returns:
      This builder for chaining.
    • hasNumThreads

      public boolean hasNumThreads()
       Number of threads in the training.
       
      optional int32 num_threads = 16 [default = 16];
      Specified by:
      hasNumThreads in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the numThreads field is set.
    • getNumThreads

      public int getNumThreads()
       Number of threads in the training.
       
      optional int32 num_threads = 16 [default = 16];
      Specified by:
      getNumThreads in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The numThreads.
    • setNumThreads

      public SentencepieceModel.TrainerSpec.Builder setNumThreads(int value)
       Number of threads in the training.
       
      optional int32 num_threads = 16 [default = 16];
      Parameters:
      value - The numThreads to set.
      Returns:
      This builder for chaining.
    • clearNumThreads

      public SentencepieceModel.TrainerSpec.Builder clearNumThreads()
       Number of threads in the training.
       
      optional int32 num_threads = 16 [default = 16];
      Returns:
      This builder for chaining.
    • hasNumSubIterations

      public boolean hasNumSubIterations()
       Number of EM sub iterations.
       
      optional int32 num_sub_iterations = 17 [default = 2];
      Specified by:
      hasNumSubIterations in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the numSubIterations field is set.
    • getNumSubIterations

      public int getNumSubIterations()
       Number of EM sub iterations.
       
      optional int32 num_sub_iterations = 17 [default = 2];
      Specified by:
      getNumSubIterations in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The numSubIterations.
    • setNumSubIterations

      public SentencepieceModel.TrainerSpec.Builder setNumSubIterations(int value)
       Number of EM sub iterations.
       
      optional int32 num_sub_iterations = 17 [default = 2];
      Parameters:
      value - The numSubIterations to set.
      Returns:
      This builder for chaining.
    • clearNumSubIterations

      public SentencepieceModel.TrainerSpec.Builder clearNumSubIterations()
       Number of EM sub iterations.
       
      optional int32 num_sub_iterations = 17 [default = 2];
      Returns:
      This builder for chaining.
    • hasMaxSentencepieceLength

      public boolean hasMaxSentencepieceLength()
      /////////////////////////////////////////////////////////////////
       SentencePiece parameters which control the shapes of sentence piece.
      
       Maximum length of sentencepiece.
       
      optional int32 max_sentencepiece_length = 20 [default = 16];
      Specified by:
      hasMaxSentencepieceLength in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the maxSentencepieceLength field is set.
    • getMaxSentencepieceLength

      public int getMaxSentencepieceLength()
      /////////////////////////////////////////////////////////////////
       SentencePiece parameters which control the shapes of sentence piece.
      
       Maximum length of sentencepiece.
       
      optional int32 max_sentencepiece_length = 20 [default = 16];
      Specified by:
      getMaxSentencepieceLength in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The maxSentencepieceLength.
    • setMaxSentencepieceLength

      public SentencepieceModel.TrainerSpec.Builder setMaxSentencepieceLength(int value)
      /////////////////////////////////////////////////////////////////
       SentencePiece parameters which control the shapes of sentence piece.
      
       Maximum length of sentencepiece.
       
      optional int32 max_sentencepiece_length = 20 [default = 16];
      Parameters:
      value - The maxSentencepieceLength to set.
      Returns:
      This builder for chaining.
    • clearMaxSentencepieceLength

      public SentencepieceModel.TrainerSpec.Builder clearMaxSentencepieceLength()
      /////////////////////////////////////////////////////////////////
       SentencePiece parameters which control the shapes of sentence piece.
      
       Maximum length of sentencepiece.
       
      optional int32 max_sentencepiece_length = 20 [default = 16];
      Returns:
      This builder for chaining.
    • hasSplitByUnicodeScript

      public boolean hasSplitByUnicodeScript()
       Uses Unicode script to split sentence pieces.
       When `split_by_unicode_script` is true, we do not allow sentence piece to
       include multiple Unicode scripts, e.g. "F1" is not a valid piece.
       Exception: CJ characters (Hiragana/Katakana/Han) are all handled
       as one script type, since Japanese word can consist of multiple scripts.
       This exception is always applied regardless of the accept-language
       parameter.
       
      optional bool split_by_unicode_script = 21 [default = true];
      Specified by:
      hasSplitByUnicodeScript in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the splitByUnicodeScript field is set.
    • getSplitByUnicodeScript

      public boolean getSplitByUnicodeScript()
       Uses Unicode script to split sentence pieces.
       When `split_by_unicode_script` is true, we do not allow sentence piece to
       include multiple Unicode scripts, e.g. "F1" is not a valid piece.
       Exception: CJ characters (Hiragana/Katakana/Han) are all handled
       as one script type, since Japanese word can consist of multiple scripts.
       This exception is always applied regardless of the accept-language
       parameter.
       
      optional bool split_by_unicode_script = 21 [default = true];
      Specified by:
      getSplitByUnicodeScript in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The splitByUnicodeScript.
    • setSplitByUnicodeScript

      public SentencepieceModel.TrainerSpec.Builder setSplitByUnicodeScript(boolean value)
       Uses Unicode script to split sentence pieces.
       When `split_by_unicode_script` is true, we do not allow sentence piece to
       include multiple Unicode scripts, e.g. "F1" is not a valid piece.
       Exception: CJ characters (Hiragana/Katakana/Han) are all handled
       as one script type, since Japanese word can consist of multiple scripts.
       This exception is always applied regardless of the accept-language
       parameter.
       
      optional bool split_by_unicode_script = 21 [default = true];
      Parameters:
      value - The splitByUnicodeScript to set.
      Returns:
      This builder for chaining.
    • clearSplitByUnicodeScript

      public SentencepieceModel.TrainerSpec.Builder clearSplitByUnicodeScript()
       Uses Unicode script to split sentence pieces.
       When `split_by_unicode_script` is true, we do not allow sentence piece to
       include multiple Unicode scripts, e.g. "F1" is not a valid piece.
       Exception: CJ characters (Hiragana/Katakana/Han) are all handled
       as one script type, since Japanese word can consist of multiple scripts.
       This exception is always applied regardless of the accept-language
       parameter.
       
      optional bool split_by_unicode_script = 21 [default = true];
      Returns:
      This builder for chaining.
    • hasSplitByNumber

      public boolean hasSplitByNumber()
       When `split_by_number` is true, put a boundary between number and
       non-number transition. If we want to treat "F1" is one token, set this flag
       to be false.
       
      optional bool split_by_number = 23 [default = true];
      Specified by:
      hasSplitByNumber in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the splitByNumber field is set.
    • getSplitByNumber

      public boolean getSplitByNumber()
       When `split_by_number` is true, put a boundary between number and
       non-number transition. If we want to treat "F1" is one token, set this flag
       to be false.
       
      optional bool split_by_number = 23 [default = true];
      Specified by:
      getSplitByNumber in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The splitByNumber.
    • setSplitByNumber

      public SentencepieceModel.TrainerSpec.Builder setSplitByNumber(boolean value)
       When `split_by_number` is true, put a boundary between number and
       non-number transition. If we want to treat "F1" is one token, set this flag
       to be false.
       
      optional bool split_by_number = 23 [default = true];
      Parameters:
      value - The splitByNumber to set.
      Returns:
      This builder for chaining.
    • clearSplitByNumber

      public SentencepieceModel.TrainerSpec.Builder clearSplitByNumber()
       When `split_by_number` is true, put a boundary between number and
       non-number transition. If we want to treat "F1" is one token, set this flag
       to be false.
       
      optional bool split_by_number = 23 [default = true];
      Returns:
      This builder for chaining.
    • hasSplitByWhitespace

      public boolean hasSplitByWhitespace()
       Use a white space to split sentence pieces.
       When `split_by_whitespace` is false, we may have the piece containing
       a white space in the middle. e.g., "in_the".
       
      optional bool split_by_whitespace = 22 [default = true];
      Specified by:
      hasSplitByWhitespace in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the splitByWhitespace field is set.
    • getSplitByWhitespace

      public boolean getSplitByWhitespace()
       Use a white space to split sentence pieces.
       When `split_by_whitespace` is false, we may have the piece containing
       a white space in the middle. e.g., "in_the".
       
      optional bool split_by_whitespace = 22 [default = true];
      Specified by:
      getSplitByWhitespace in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The splitByWhitespace.
    • setSplitByWhitespace

      public SentencepieceModel.TrainerSpec.Builder setSplitByWhitespace(boolean value)
       Use a white space to split sentence pieces.
       When `split_by_whitespace` is false, we may have the piece containing
       a white space in the middle. e.g., "in_the".
       
      optional bool split_by_whitespace = 22 [default = true];
      Parameters:
      value - The splitByWhitespace to set.
      Returns:
      This builder for chaining.
    • clearSplitByWhitespace

      public SentencepieceModel.TrainerSpec.Builder clearSplitByWhitespace()
       Use a white space to split sentence pieces.
       When `split_by_whitespace` is false, we may have the piece containing
       a white space in the middle. e.g., "in_the".
       
      optional bool split_by_whitespace = 22 [default = true];
      Returns:
      This builder for chaining.
    • hasTreatWhitespaceAsSuffix

      public boolean hasTreatWhitespaceAsSuffix()
       Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
       hello_. When `treat_whitespace_as_suffix` is true,
       NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
       of sentence.
       
      optional bool treat_whitespace_as_suffix = 24 [default = false];
      Specified by:
      hasTreatWhitespaceAsSuffix in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the treatWhitespaceAsSuffix field is set.
    • getTreatWhitespaceAsSuffix

      public boolean getTreatWhitespaceAsSuffix()
       Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
       hello_. When `treat_whitespace_as_suffix` is true,
       NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
       of sentence.
       
      optional bool treat_whitespace_as_suffix = 24 [default = false];
      Specified by:
      getTreatWhitespaceAsSuffix in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The treatWhitespaceAsSuffix.
    • setTreatWhitespaceAsSuffix

      public SentencepieceModel.TrainerSpec.Builder setTreatWhitespaceAsSuffix(boolean value)
       Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
       hello_. When `treat_whitespace_as_suffix` is true,
       NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
       of sentence.
       
      optional bool treat_whitespace_as_suffix = 24 [default = false];
      Parameters:
      value - The treatWhitespaceAsSuffix to set.
      Returns:
      This builder for chaining.
    • clearTreatWhitespaceAsSuffix

      public SentencepieceModel.TrainerSpec.Builder clearTreatWhitespaceAsSuffix()
       Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
       hello_. When `treat_whitespace_as_suffix` is true,
       NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
       of sentence.
       
      optional bool treat_whitespace_as_suffix = 24 [default = false];
      Returns:
      This builder for chaining.
    • hasAllowWhitespaceOnlyPieces

      public boolean hasAllowWhitespaceOnlyPieces()
       Allows pieces that only contain whitespaces instead of appearing only as
       prefix or suffix of other pieces.
       
      optional bool allow_whitespace_only_pieces = 26 [default = false];
      Specified by:
      hasAllowWhitespaceOnlyPieces in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the allowWhitespaceOnlyPieces field is set.
    • getAllowWhitespaceOnlyPieces

      public boolean getAllowWhitespaceOnlyPieces()
       Allows pieces that only contain whitespaces instead of appearing only as
       prefix or suffix of other pieces.
       
      optional bool allow_whitespace_only_pieces = 26 [default = false];
      Specified by:
      getAllowWhitespaceOnlyPieces in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The allowWhitespaceOnlyPieces.
    • setAllowWhitespaceOnlyPieces

      public SentencepieceModel.TrainerSpec.Builder setAllowWhitespaceOnlyPieces(boolean value)
       Allows pieces that only contain whitespaces instead of appearing only as
       prefix or suffix of other pieces.
       
      optional bool allow_whitespace_only_pieces = 26 [default = false];
      Parameters:
      value - The allowWhitespaceOnlyPieces to set.
      Returns:
      This builder for chaining.
    • clearAllowWhitespaceOnlyPieces

      public SentencepieceModel.TrainerSpec.Builder clearAllowWhitespaceOnlyPieces()
       Allows pieces that only contain whitespaces instead of appearing only as
       prefix or suffix of other pieces.
       
      optional bool allow_whitespace_only_pieces = 26 [default = false];
      Returns:
      This builder for chaining.
    • hasSplitDigits

      public boolean hasSplitDigits()
       Split all digits (0-9) into separate pieces.
       
      optional bool split_digits = 25 [default = false];
      Specified by:
      hasSplitDigits in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the splitDigits field is set.
    • getSplitDigits

      public boolean getSplitDigits()
       Split all digits (0-9) into separate pieces.
       
      optional bool split_digits = 25 [default = false];
      Specified by:
      getSplitDigits in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The splitDigits.
    • setSplitDigits

      public SentencepieceModel.TrainerSpec.Builder setSplitDigits(boolean value)
       Split all digits (0-9) into separate pieces.
       
      optional bool split_digits = 25 [default = false];
      Parameters:
      value - The splitDigits to set.
      Returns:
      This builder for chaining.
    • clearSplitDigits

      public SentencepieceModel.TrainerSpec.Builder clearSplitDigits()
       Split all digits (0-9) into separate pieces.
       
      optional bool split_digits = 25 [default = false];
      Returns:
      This builder for chaining.
    • hasPretokenizationDelimiter

      public boolean hasPretokenizationDelimiter()
       Defines the pre-tokenization delimiter.
       When specified, no pieces crossing this delimiter is not included
       in the vocab. Then the delimiter string is virtually ignored
       during the training. This field can allows constraints on the vocabulary
       selection. Note that this field is available on unigram mode.
       
      optional string pretokenization_delimiter = 53 [default = ""];
      Specified by:
      hasPretokenizationDelimiter in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the pretokenizationDelimiter field is set.
    • getPretokenizationDelimiter

      public String getPretokenizationDelimiter()
       Defines the pre-tokenization delimiter.
       When specified, no pieces crossing this delimiter is not included
       in the vocab. Then the delimiter string is virtually ignored
       during the training. This field can allows constraints on the vocabulary
       selection. Note that this field is available on unigram mode.
       
      optional string pretokenization_delimiter = 53 [default = ""];
      Specified by:
      getPretokenizationDelimiter in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The pretokenizationDelimiter.
    • getPretokenizationDelimiterBytes

      public com.google.protobuf.ByteString getPretokenizationDelimiterBytes()
       Defines the pre-tokenization delimiter.
       When specified, no pieces crossing this delimiter is not included
       in the vocab. Then the delimiter string is virtually ignored
       during the training. This field can allows constraints on the vocabulary
       selection. Note that this field is available on unigram mode.
       
      optional string pretokenization_delimiter = 53 [default = ""];
      Specified by:
      getPretokenizationDelimiterBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for pretokenizationDelimiter.
    • setPretokenizationDelimiter

      public SentencepieceModel.TrainerSpec.Builder setPretokenizationDelimiter(String value)
       Defines the pre-tokenization delimiter.
       When specified, no pieces crossing this delimiter is not included
       in the vocab. Then the delimiter string is virtually ignored
       during the training. This field can allows constraints on the vocabulary
       selection. Note that this field is available on unigram mode.
       
      optional string pretokenization_delimiter = 53 [default = ""];
      Parameters:
      value - The pretokenizationDelimiter to set.
      Returns:
      This builder for chaining.
    • clearPretokenizationDelimiter

      public SentencepieceModel.TrainerSpec.Builder clearPretokenizationDelimiter()
       Defines the pre-tokenization delimiter.
       When specified, no pieces crossing this delimiter is not included
       in the vocab. Then the delimiter string is virtually ignored
       during the training. This field can allows constraints on the vocabulary
       selection. Note that this field is available on unigram mode.
       
      optional string pretokenization_delimiter = 53 [default = ""];
      Returns:
      This builder for chaining.
    • setPretokenizationDelimiterBytes

      public SentencepieceModel.TrainerSpec.Builder setPretokenizationDelimiterBytes(com.google.protobuf.ByteString value)
       Defines the pre-tokenization delimiter.
       When specified, no pieces crossing this delimiter is not included
       in the vocab. Then the delimiter string is virtually ignored
       during the training. This field can allows constraints on the vocabulary
       selection. Note that this field is available on unigram mode.
       
      optional string pretokenization_delimiter = 53 [default = ""];
      Parameters:
      value - The bytes for pretokenizationDelimiter to set.
      Returns:
      This builder for chaining.
    • getControlSymbolsList

      public com.google.protobuf.ProtocolStringList getControlSymbolsList()
      /////////////////////////////////////////////////////////////////
       Vocabulary management
      
       Defines control symbols used as an indicator to
       change the behavior of the decoder. <s> and </s> are pre-defined.
       We can use this field to encode various meta information,
       including language indicator in multilingual model.
       These symbols are not visible to users, but visible to
       the decoder. Note that when the input sentence contains control symbols,
       they are not treated as one token, but segmented into normal pieces.
       Control symbols must be inserted independently from the segmentation.
       
      repeated string control_symbols = 30;
      Specified by:
      getControlSymbolsList in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      A list containing the controlSymbols.
    • getControlSymbolsCount

      public int getControlSymbolsCount()
      /////////////////////////////////////////////////////////////////
       Vocabulary management
      
       Defines control symbols used as an indicator to
       change the behavior of the decoder. <s> and </s> are pre-defined.
       We can use this field to encode various meta information,
       including language indicator in multilingual model.
       These symbols are not visible to users, but visible to
       the decoder. Note that when the input sentence contains control symbols,
       they are not treated as one token, but segmented into normal pieces.
       Control symbols must be inserted independently from the segmentation.
       
      repeated string control_symbols = 30;
      Specified by:
      getControlSymbolsCount in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The count of controlSymbols.
    • getControlSymbols

      public String getControlSymbols(int index)
      /////////////////////////////////////////////////////////////////
       Vocabulary management
      
       Defines control symbols used as an indicator to
       change the behavior of the decoder. <s> and </s> are pre-defined.
       We can use this field to encode various meta information,
       including language indicator in multilingual model.
       These symbols are not visible to users, but visible to
       the decoder. Note that when the input sentence contains control symbols,
       they are not treated as one token, but segmented into normal pieces.
       Control symbols must be inserted independently from the segmentation.
       
      repeated string control_symbols = 30;
      Specified by:
      getControlSymbols in interface SentencepieceModel.TrainerSpecOrBuilder
      Parameters:
      index - The index of the element to return.
      Returns:
      The controlSymbols at the given index.
    • getControlSymbolsBytes

      public com.google.protobuf.ByteString getControlSymbolsBytes(int index)
      /////////////////////////////////////////////////////////////////
       Vocabulary management
      
       Defines control symbols used as an indicator to
       change the behavior of the decoder. <s> and </s> are pre-defined.
       We can use this field to encode various meta information,
       including language indicator in multilingual model.
       These symbols are not visible to users, but visible to
       the decoder. Note that when the input sentence contains control symbols,
       they are not treated as one token, but segmented into normal pieces.
       Control symbols must be inserted independently from the segmentation.
       
      repeated string control_symbols = 30;
      Specified by:
      getControlSymbolsBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Parameters:
      index - The index of the value to return.
      Returns:
      The bytes of the controlSymbols at the given index.
    • setControlSymbols

      public SentencepieceModel.TrainerSpec.Builder setControlSymbols(int index, String value)
      /////////////////////////////////////////////////////////////////
       Vocabulary management
      
       Defines control symbols used as an indicator to
       change the behavior of the decoder. <s> and </s> are pre-defined.
       We can use this field to encode various meta information,
       including language indicator in multilingual model.
       These symbols are not visible to users, but visible to
       the decoder. Note that when the input sentence contains control symbols,
       they are not treated as one token, but segmented into normal pieces.
       Control symbols must be inserted independently from the segmentation.
       
      repeated string control_symbols = 30;
      Parameters:
      index - The index to set the value at.
      value - The controlSymbols to set.
      Returns:
      This builder for chaining.
    • addControlSymbols

      public SentencepieceModel.TrainerSpec.Builder addControlSymbols(String value)
      /////////////////////////////////////////////////////////////////
       Vocabulary management
      
       Defines control symbols used as an indicator to
       change the behavior of the decoder. <s> and </s> are pre-defined.
       We can use this field to encode various meta information,
       including language indicator in multilingual model.
       These symbols are not visible to users, but visible to
       the decoder. Note that when the input sentence contains control symbols,
       they are not treated as one token, but segmented into normal pieces.
       Control symbols must be inserted independently from the segmentation.
       
      repeated string control_symbols = 30;
      Parameters:
      value - The controlSymbols to add.
      Returns:
      This builder for chaining.
    • addAllControlSymbols

      public SentencepieceModel.TrainerSpec.Builder addAllControlSymbols(Iterable<String> values)
      /////////////////////////////////////////////////////////////////
       Vocabulary management
      
       Defines control symbols used as an indicator to
       change the behavior of the decoder. <s> and </s> are pre-defined.
       We can use this field to encode various meta information,
       including language indicator in multilingual model.
       These symbols are not visible to users, but visible to
       the decoder. Note that when the input sentence contains control symbols,
       they are not treated as one token, but segmented into normal pieces.
       Control symbols must be inserted independently from the segmentation.
       
      repeated string control_symbols = 30;
      Parameters:
      values - The controlSymbols to add.
      Returns:
      This builder for chaining.
    • clearControlSymbols

      public SentencepieceModel.TrainerSpec.Builder clearControlSymbols()
      /////////////////////////////////////////////////////////////////
       Vocabulary management
      
       Defines control symbols used as an indicator to
       change the behavior of the decoder. <s> and </s> are pre-defined.
       We can use this field to encode various meta information,
       including language indicator in multilingual model.
       These symbols are not visible to users, but visible to
       the decoder. Note that when the input sentence contains control symbols,
       they are not treated as one token, but segmented into normal pieces.
       Control symbols must be inserted independently from the segmentation.
       
      repeated string control_symbols = 30;
      Returns:
      This builder for chaining.
    • addControlSymbolsBytes

      public SentencepieceModel.TrainerSpec.Builder addControlSymbolsBytes(com.google.protobuf.ByteString value)
      /////////////////////////////////////////////////////////////////
       Vocabulary management
      
       Defines control symbols used as an indicator to
       change the behavior of the decoder. <s> and </s> are pre-defined.
       We can use this field to encode various meta information,
       including language indicator in multilingual model.
       These symbols are not visible to users, but visible to
       the decoder. Note that when the input sentence contains control symbols,
       they are not treated as one token, but segmented into normal pieces.
       Control symbols must be inserted independently from the segmentation.
       
      repeated string control_symbols = 30;
      Parameters:
      value - The bytes of the controlSymbols to add.
      Returns:
      This builder for chaining.
    • getUserDefinedSymbolsList

      public com.google.protobuf.ProtocolStringList getUserDefinedSymbolsList()
       Defines user defined symbols.
       These symbols are added with extremely high score
       so they are always treated as one unique symbol in any context.
       Typical usage of user_defined_symbols is placeholder for named entities.
       
      repeated string user_defined_symbols = 31;
      Specified by:
      getUserDefinedSymbolsList in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      A list containing the userDefinedSymbols.
    • getUserDefinedSymbolsCount

      public int getUserDefinedSymbolsCount()
       Defines user defined symbols.
       These symbols are added with extremely high score
       so they are always treated as one unique symbol in any context.
       Typical usage of user_defined_symbols is placeholder for named entities.
       
      repeated string user_defined_symbols = 31;
      Specified by:
      getUserDefinedSymbolsCount in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The count of userDefinedSymbols.
    • getUserDefinedSymbols

      public String getUserDefinedSymbols(int index)
       Defines user defined symbols.
       These symbols are added with extremely high score
       so they are always treated as one unique symbol in any context.
       Typical usage of user_defined_symbols is placeholder for named entities.
       
      repeated string user_defined_symbols = 31;
      Specified by:
      getUserDefinedSymbols in interface SentencepieceModel.TrainerSpecOrBuilder
      Parameters:
      index - The index of the element to return.
      Returns:
      The userDefinedSymbols at the given index.
    • getUserDefinedSymbolsBytes

      public com.google.protobuf.ByteString getUserDefinedSymbolsBytes(int index)
       Defines user defined symbols.
       These symbols are added with extremely high score
       so they are always treated as one unique symbol in any context.
       Typical usage of user_defined_symbols is placeholder for named entities.
       
      repeated string user_defined_symbols = 31;
      Specified by:
      getUserDefinedSymbolsBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Parameters:
      index - The index of the value to return.
      Returns:
      The bytes of the userDefinedSymbols at the given index.
    • setUserDefinedSymbols

      public SentencepieceModel.TrainerSpec.Builder setUserDefinedSymbols(int index, String value)
       Defines user defined symbols.
       These symbols are added with extremely high score
       so they are always treated as one unique symbol in any context.
       Typical usage of user_defined_symbols is placeholder for named entities.
       
      repeated string user_defined_symbols = 31;
      Parameters:
      index - The index to set the value at.
      value - The userDefinedSymbols to set.
      Returns:
      This builder for chaining.
    • addUserDefinedSymbols

      public SentencepieceModel.TrainerSpec.Builder addUserDefinedSymbols(String value)
       Defines user defined symbols.
       These symbols are added with extremely high score
       so they are always treated as one unique symbol in any context.
       Typical usage of user_defined_symbols is placeholder for named entities.
       
      repeated string user_defined_symbols = 31;
      Parameters:
      value - The userDefinedSymbols to add.
      Returns:
      This builder for chaining.
    • addAllUserDefinedSymbols

      public SentencepieceModel.TrainerSpec.Builder addAllUserDefinedSymbols(Iterable<String> values)
       Defines user defined symbols.
       These symbols are added with extremely high score
       so they are always treated as one unique symbol in any context.
       Typical usage of user_defined_symbols is placeholder for named entities.
       
      repeated string user_defined_symbols = 31;
      Parameters:
      values - The userDefinedSymbols to add.
      Returns:
      This builder for chaining.
    • clearUserDefinedSymbols

      public SentencepieceModel.TrainerSpec.Builder clearUserDefinedSymbols()
       Defines user defined symbols.
       These symbols are added with extremely high score
       so they are always treated as one unique symbol in any context.
       Typical usage of user_defined_symbols is placeholder for named entities.
       
      repeated string user_defined_symbols = 31;
      Returns:
      This builder for chaining.
    • addUserDefinedSymbolsBytes

      public SentencepieceModel.TrainerSpec.Builder addUserDefinedSymbolsBytes(com.google.protobuf.ByteString value)
       Defines user defined symbols.
       These symbols are added with extremely high score
       so they are always treated as one unique symbol in any context.
       Typical usage of user_defined_symbols is placeholder for named entities.
       
      repeated string user_defined_symbols = 31;
      Parameters:
      value - The bytes of the userDefinedSymbols to add.
      Returns:
      This builder for chaining.
    • hasRequiredChars

      public boolean hasRequiredChars()
       Defines required characters. Each UTF8 character in this string is included
       in the character set regardless of character_coverage value. Unlike
       user_defined_symbols, these characters have scores based on the frequency
       on input sentences, and the model can form subwords using characters
       in this field.
       
      optional string required_chars = 36;
      Specified by:
      hasRequiredChars in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the requiredChars field is set.
    • getRequiredChars

      public String getRequiredChars()
       Defines required characters. Each UTF8 character in this string is included
       in the character set regardless of character_coverage value. Unlike
       user_defined_symbols, these characters have scores based on the frequency
       on input sentences, and the model can form subwords using characters
       in this field.
       
      optional string required_chars = 36;
      Specified by:
      getRequiredChars in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The requiredChars.
    • getRequiredCharsBytes

      public com.google.protobuf.ByteString getRequiredCharsBytes()
       Defines required characters. Each UTF8 character in this string is included
       in the character set regardless of character_coverage value. Unlike
       user_defined_symbols, these characters have scores based on the frequency
       on input sentences, and the model can form subwords using characters
       in this field.
       
      optional string required_chars = 36;
      Specified by:
      getRequiredCharsBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for requiredChars.
    • setRequiredChars

      public SentencepieceModel.TrainerSpec.Builder setRequiredChars(String value)
       Defines required characters. Each UTF8 character in this string is included
       in the character set regardless of character_coverage value. Unlike
       user_defined_symbols, these characters have scores based on the frequency
       on input sentences, and the model can form subwords using characters
       in this field.
       
      optional string required_chars = 36;
      Parameters:
      value - The requiredChars to set.
      Returns:
      This builder for chaining.
    • clearRequiredChars

      public SentencepieceModel.TrainerSpec.Builder clearRequiredChars()
       Defines required characters. Each UTF8 character in this string is included
       in the character set regardless of character_coverage value. Unlike
       user_defined_symbols, these characters have scores based on the frequency
       on input sentences, and the model can form subwords using characters
       in this field.
       
      optional string required_chars = 36;
      Returns:
      This builder for chaining.
    • setRequiredCharsBytes

      public SentencepieceModel.TrainerSpec.Builder setRequiredCharsBytes(com.google.protobuf.ByteString value)
       Defines required characters. Each UTF8 character in this string is included
       in the character set regardless of character_coverage value. Unlike
       user_defined_symbols, these characters have scores based on the frequency
       on input sentences, and the model can form subwords using characters
       in this field.
       
      optional string required_chars = 36;
      Parameters:
      value - The bytes for requiredChars to set.
      Returns:
      This builder for chaining.
    • hasByteFallback

      public boolean hasByteFallback()
       Decomposes unknown pieces into UTF-8 bytes.
       
      optional bool byte_fallback = 35 [default = false];
      Specified by:
      hasByteFallback in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the byteFallback field is set.
    • getByteFallback

      public boolean getByteFallback()
       Decomposes unknown pieces into UTF-8 bytes.
       
      optional bool byte_fallback = 35 [default = false];
      Specified by:
      getByteFallback in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The byteFallback.
    • setByteFallback

      public SentencepieceModel.TrainerSpec.Builder setByteFallback(boolean value)
       Decomposes unknown pieces into UTF-8 bytes.
       
      optional bool byte_fallback = 35 [default = false];
      Parameters:
      value - The byteFallback to set.
      Returns:
      This builder for chaining.
    • clearByteFallback

      public SentencepieceModel.TrainerSpec.Builder clearByteFallback()
       Decomposes unknown pieces into UTF-8 bytes.
       
      optional bool byte_fallback = 35 [default = false];
      Returns:
      This builder for chaining.
    • hasVocabularyOutputPieceScore

      public boolean hasVocabularyOutputPieceScore()
       When creating the vocabulary file, defines whether or not to additionally
       output the score for each piece.
       
      optional bool vocabulary_output_piece_score = 32 [default = true];
      Specified by:
      hasVocabularyOutputPieceScore in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the vocabularyOutputPieceScore field is set.
    • getVocabularyOutputPieceScore

      public boolean getVocabularyOutputPieceScore()
       When creating the vocabulary file, defines whether or not to additionally
       output the score for each piece.
       
      optional bool vocabulary_output_piece_score = 32 [default = true];
      Specified by:
      getVocabularyOutputPieceScore in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The vocabularyOutputPieceScore.
    • setVocabularyOutputPieceScore

      public SentencepieceModel.TrainerSpec.Builder setVocabularyOutputPieceScore(boolean value)
       When creating the vocabulary file, defines whether or not to additionally
       output the score for each piece.
       
      optional bool vocabulary_output_piece_score = 32 [default = true];
      Parameters:
      value - The vocabularyOutputPieceScore to set.
      Returns:
      This builder for chaining.
    • clearVocabularyOutputPieceScore

      public SentencepieceModel.TrainerSpec.Builder clearVocabularyOutputPieceScore()
       When creating the vocabulary file, defines whether or not to additionally
       output the score for each piece.
       
      optional bool vocabulary_output_piece_score = 32 [default = true];
      Returns:
      This builder for chaining.
    • hasHardVocabLimit

      public boolean hasHardVocabLimit()
       `vocab_size` is treated as hard limit. Crash if
       the model can not produce the vocab of size `vocab_size`,
       When `hard_vocab_limit` is false, vocab_size is treated
       as soft limit. Note that when model_type=char,
       always assumes hard_vocab_limit = false.
       
      optional bool hard_vocab_limit = 33 [default = true];
      Specified by:
      hasHardVocabLimit in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the hardVocabLimit field is set.
    • getHardVocabLimit

      public boolean getHardVocabLimit()
       `vocab_size` is treated as hard limit. Crash if
       the model can not produce the vocab of size `vocab_size`,
       When `hard_vocab_limit` is false, vocab_size is treated
       as soft limit. Note that when model_type=char,
       always assumes hard_vocab_limit = false.
       
      optional bool hard_vocab_limit = 33 [default = true];
      Specified by:
      getHardVocabLimit in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The hardVocabLimit.
    • setHardVocabLimit

      public SentencepieceModel.TrainerSpec.Builder setHardVocabLimit(boolean value)
       `vocab_size` is treated as hard limit. Crash if
       the model can not produce the vocab of size `vocab_size`,
       When `hard_vocab_limit` is false, vocab_size is treated
       as soft limit. Note that when model_type=char,
       always assumes hard_vocab_limit = false.
       
      optional bool hard_vocab_limit = 33 [default = true];
      Parameters:
      value - The hardVocabLimit to set.
      Returns:
      This builder for chaining.
    • clearHardVocabLimit

      public SentencepieceModel.TrainerSpec.Builder clearHardVocabLimit()
       `vocab_size` is treated as hard limit. Crash if
       the model can not produce the vocab of size `vocab_size`,
       When `hard_vocab_limit` is false, vocab_size is treated
       as soft limit. Note that when model_type=char,
       always assumes hard_vocab_limit = false.
       
      optional bool hard_vocab_limit = 33 [default = true];
      Returns:
      This builder for chaining.
    • hasUseAllVocab

      public boolean hasUseAllVocab()
       use all symbols for vocab extraction. This flag is valid
       if model type is either CHAR or WORD
       
      optional bool use_all_vocab = 34 [default = false];
      Specified by:
      hasUseAllVocab in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the useAllVocab field is set.
    • getUseAllVocab

      public boolean getUseAllVocab()
       use all symbols for vocab extraction. This flag is valid
       if model type is either CHAR or WORD
       
      optional bool use_all_vocab = 34 [default = false];
      Specified by:
      getUseAllVocab in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The useAllVocab.
    • setUseAllVocab

      public SentencepieceModel.TrainerSpec.Builder setUseAllVocab(boolean value)
       use all symbols for vocab extraction. This flag is valid
       if model type is either CHAR or WORD
       
      optional bool use_all_vocab = 34 [default = false];
      Parameters:
      value - The useAllVocab to set.
      Returns:
      This builder for chaining.
    • clearUseAllVocab

      public SentencepieceModel.TrainerSpec.Builder clearUseAllVocab()
       use all symbols for vocab extraction. This flag is valid
       if model type is either CHAR or WORD
       
      optional bool use_all_vocab = 34 [default = false];
      Returns:
      This builder for chaining.
    • hasUnkId

      public boolean hasUnkId()
      /////////////////////////////////////////////////////////////////
       Reserved special meta tokens.
       * -1 is not used.
       * unk_id must not be -1.
       Id must starts with 0 and be contiguous.
       
      optional int32 unk_id = 40 [default = 0];
      Specified by:
      hasUnkId in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the unkId field is set.
    • getUnkId

      public int getUnkId()
      /////////////////////////////////////////////////////////////////
       Reserved special meta tokens.
       * -1 is not used.
       * unk_id must not be -1.
       Id must starts with 0 and be contiguous.
       
      optional int32 unk_id = 40 [default = 0];
      Specified by:
      getUnkId in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The unkId.
    • setUnkId

      public SentencepieceModel.TrainerSpec.Builder setUnkId(int value)
      /////////////////////////////////////////////////////////////////
       Reserved special meta tokens.
       * -1 is not used.
       * unk_id must not be -1.
       Id must starts with 0 and be contiguous.
       
      optional int32 unk_id = 40 [default = 0];
      Parameters:
      value - The unkId to set.
      Returns:
      This builder for chaining.
    • clearUnkId

      /////////////////////////////////////////////////////////////////
       Reserved special meta tokens.
       * -1 is not used.
       * unk_id must not be -1.
       Id must starts with 0 and be contiguous.
       
      optional int32 unk_id = 40 [default = 0];
      Returns:
      This builder for chaining.
    • hasBosId

      public boolean hasBosId()
       <s>
       
      optional int32 bos_id = 41 [default = 1];
      Specified by:
      hasBosId in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the bosId field is set.
    • getBosId

      public int getBosId()
       <s>
       
      optional int32 bos_id = 41 [default = 1];
      Specified by:
      getBosId in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bosId.
    • setBosId

      public SentencepieceModel.TrainerSpec.Builder setBosId(int value)
       <s>
       
      optional int32 bos_id = 41 [default = 1];
      Parameters:
      value - The bosId to set.
      Returns:
      This builder for chaining.
    • clearBosId

       <s>
       
      optional int32 bos_id = 41 [default = 1];
      Returns:
      This builder for chaining.
    • hasEosId

      public boolean hasEosId()
       </s>
       
      optional int32 eos_id = 42 [default = 2];
      Specified by:
      hasEosId in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the eosId field is set.
    • getEosId

      public int getEosId()
       </s>
       
      optional int32 eos_id = 42 [default = 2];
      Specified by:
      getEosId in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The eosId.
    • setEosId

      public SentencepieceModel.TrainerSpec.Builder setEosId(int value)
       </s>
       
      optional int32 eos_id = 42 [default = 2];
      Parameters:
      value - The eosId to set.
      Returns:
      This builder for chaining.
    • clearEosId

       </s>
       
      optional int32 eos_id = 42 [default = 2];
      Returns:
      This builder for chaining.
    • hasPadId

      public boolean hasPadId()
       <pad> (padding)
       
      optional int32 pad_id = 43 [default = -1];
      Specified by:
      hasPadId in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the padId field is set.
    • getPadId

      public int getPadId()
       <pad> (padding)
       
      optional int32 pad_id = 43 [default = -1];
      Specified by:
      getPadId in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The padId.
    • setPadId

      public SentencepieceModel.TrainerSpec.Builder setPadId(int value)
       <pad> (padding)
       
      optional int32 pad_id = 43 [default = -1];
      Parameters:
      value - The padId to set.
      Returns:
      This builder for chaining.
    • clearPadId

       <pad> (padding)
       
      optional int32 pad_id = 43 [default = -1];
      Returns:
      This builder for chaining.
    • hasUnkPiece

      public boolean hasUnkPiece()
      optional string unk_piece = 45 [default = "<unk>"];
      Specified by:
      hasUnkPiece in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the unkPiece field is set.
    • getUnkPiece

      public String getUnkPiece()
      optional string unk_piece = 45 [default = "<unk>"];
      Specified by:
      getUnkPiece in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The unkPiece.
    • getUnkPieceBytes

      public com.google.protobuf.ByteString getUnkPieceBytes()
      optional string unk_piece = 45 [default = "<unk>"];
      Specified by:
      getUnkPieceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for unkPiece.
    • setUnkPiece

      public SentencepieceModel.TrainerSpec.Builder setUnkPiece(String value)
      optional string unk_piece = 45 [default = "<unk>"];
      Parameters:
      value - The unkPiece to set.
      Returns:
      This builder for chaining.
    • clearUnkPiece

      optional string unk_piece = 45 [default = "<unk>"];
      Returns:
      This builder for chaining.
    • setUnkPieceBytes

      public SentencepieceModel.TrainerSpec.Builder setUnkPieceBytes(com.google.protobuf.ByteString value)
      optional string unk_piece = 45 [default = "<unk>"];
      Parameters:
      value - The bytes for unkPiece to set.
      Returns:
      This builder for chaining.
    • hasBosPiece

      public boolean hasBosPiece()
      optional string bos_piece = 46 [default = "<s>"];
      Specified by:
      hasBosPiece in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the bosPiece field is set.
    • getBosPiece

      public String getBosPiece()
      optional string bos_piece = 46 [default = "<s>"];
      Specified by:
      getBosPiece in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bosPiece.
    • getBosPieceBytes

      public com.google.protobuf.ByteString getBosPieceBytes()
      optional string bos_piece = 46 [default = "<s>"];
      Specified by:
      getBosPieceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for bosPiece.
    • setBosPiece

      public SentencepieceModel.TrainerSpec.Builder setBosPiece(String value)
      optional string bos_piece = 46 [default = "<s>"];
      Parameters:
      value - The bosPiece to set.
      Returns:
      This builder for chaining.
    • clearBosPiece

      optional string bos_piece = 46 [default = "<s>"];
      Returns:
      This builder for chaining.
    • setBosPieceBytes

      public SentencepieceModel.TrainerSpec.Builder setBosPieceBytes(com.google.protobuf.ByteString value)
      optional string bos_piece = 46 [default = "<s>"];
      Parameters:
      value - The bytes for bosPiece to set.
      Returns:
      This builder for chaining.
    • hasEosPiece

      public boolean hasEosPiece()
      optional string eos_piece = 47 [default = "</s>"];
      Specified by:
      hasEosPiece in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the eosPiece field is set.
    • getEosPiece

      public String getEosPiece()
      optional string eos_piece = 47 [default = "</s>"];
      Specified by:
      getEosPiece in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The eosPiece.
    • getEosPieceBytes

      public com.google.protobuf.ByteString getEosPieceBytes()
      optional string eos_piece = 47 [default = "</s>"];
      Specified by:
      getEosPieceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for eosPiece.
    • setEosPiece

      public SentencepieceModel.TrainerSpec.Builder setEosPiece(String value)
      optional string eos_piece = 47 [default = "</s>"];
      Parameters:
      value - The eosPiece to set.
      Returns:
      This builder for chaining.
    • clearEosPiece

      optional string eos_piece = 47 [default = "</s>"];
      Returns:
      This builder for chaining.
    • setEosPieceBytes

      public SentencepieceModel.TrainerSpec.Builder setEosPieceBytes(com.google.protobuf.ByteString value)
      optional string eos_piece = 47 [default = "</s>"];
      Parameters:
      value - The bytes for eosPiece to set.
      Returns:
      This builder for chaining.
    • hasPadPiece

      public boolean hasPadPiece()
      optional string pad_piece = 48 [default = "<pad>"];
      Specified by:
      hasPadPiece in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the padPiece field is set.
    • getPadPiece

      public String getPadPiece()
      optional string pad_piece = 48 [default = "<pad>"];
      Specified by:
      getPadPiece in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The padPiece.
    • getPadPieceBytes

      public com.google.protobuf.ByteString getPadPieceBytes()
      optional string pad_piece = 48 [default = "<pad>"];
      Specified by:
      getPadPieceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for padPiece.
    • setPadPiece

      public SentencepieceModel.TrainerSpec.Builder setPadPiece(String value)
      optional string pad_piece = 48 [default = "<pad>"];
      Parameters:
      value - The padPiece to set.
      Returns:
      This builder for chaining.
    • clearPadPiece

      optional string pad_piece = 48 [default = "<pad>"];
      Returns:
      This builder for chaining.
    • setPadPieceBytes

      public SentencepieceModel.TrainerSpec.Builder setPadPieceBytes(com.google.protobuf.ByteString value)
      optional string pad_piece = 48 [default = "<pad>"];
      Parameters:
      value - The bytes for padPiece to set.
      Returns:
      This builder for chaining.
    • hasUnkSurface

      public boolean hasUnkSurface()
       Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       since this character can be useful both for user and
       developer. We can easily figure out that <unk> is emitted.
       
      optional string unk_surface = 44 [default = " \342\201\207 "];
      Specified by:
      hasUnkSurface in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the unkSurface field is set.
    • getUnkSurface

      public String getUnkSurface()
       Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       since this character can be useful both for user and
       developer. We can easily figure out that <unk> is emitted.
       
      optional string unk_surface = 44 [default = " \342\201\207 "];
      Specified by:
      getUnkSurface in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The unkSurface.
    • getUnkSurfaceBytes

      public com.google.protobuf.ByteString getUnkSurfaceBytes()
       Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       since this character can be useful both for user and
       developer. We can easily figure out that <unk> is emitted.
       
      optional string unk_surface = 44 [default = " \342\201\207 "];
      Specified by:
      getUnkSurfaceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for unkSurface.
    • setUnkSurface

      public SentencepieceModel.TrainerSpec.Builder setUnkSurface(String value)
       Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       since this character can be useful both for user and
       developer. We can easily figure out that <unk> is emitted.
       
      optional string unk_surface = 44 [default = " \342\201\207 "];
      Parameters:
      value - The unkSurface to set.
      Returns:
      This builder for chaining.
    • clearUnkSurface

      public SentencepieceModel.TrainerSpec.Builder clearUnkSurface()
       Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       since this character can be useful both for user and
       developer. We can easily figure out that <unk> is emitted.
       
      optional string unk_surface = 44 [default = " \342\201\207 "];
      Returns:
      This builder for chaining.
    • setUnkSurfaceBytes

      public SentencepieceModel.TrainerSpec.Builder setUnkSurfaceBytes(com.google.protobuf.ByteString value)
       Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
       since this character can be useful both for user and
       developer. We can easily figure out that <unk> is emitted.
       
      optional string unk_surface = 44 [default = " \342\201\207 "];
      Parameters:
      value - The bytes for unkSurface to set.
      Returns:
      This builder for chaining.
    • hasTrainExtremelyLargeCorpus

      public boolean hasTrainExtremelyLargeCorpus()
       Increase bit depth to allow unigram model training on large
       (>10M sentences) corpora. A Side-effect of enabling this flag
       is increased memory usage.
       
      optional bool train_extremely_large_corpus = 49 [default = false];
      Specified by:
      hasTrainExtremelyLargeCorpus in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the trainExtremelyLargeCorpus field is set.
    • getTrainExtremelyLargeCorpus

      public boolean getTrainExtremelyLargeCorpus()
       Increase bit depth to allow unigram model training on large
       (>10M sentences) corpora. A Side-effect of enabling this flag
       is increased memory usage.
       
      optional bool train_extremely_large_corpus = 49 [default = false];
      Specified by:
      getTrainExtremelyLargeCorpus in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The trainExtremelyLargeCorpus.
    • setTrainExtremelyLargeCorpus

      public SentencepieceModel.TrainerSpec.Builder setTrainExtremelyLargeCorpus(boolean value)
       Increase bit depth to allow unigram model training on large
       (>10M sentences) corpora. A Side-effect of enabling this flag
       is increased memory usage.
       
      optional bool train_extremely_large_corpus = 49 [default = false];
      Parameters:
      value - The trainExtremelyLargeCorpus to set.
      Returns:
      This builder for chaining.
    • clearTrainExtremelyLargeCorpus

      public SentencepieceModel.TrainerSpec.Builder clearTrainExtremelyLargeCorpus()
       Increase bit depth to allow unigram model training on large
       (>10M sentences) corpora. A Side-effect of enabling this flag
       is increased memory usage.
       
      optional bool train_extremely_large_corpus = 49 [default = false];
      Returns:
      This builder for chaining.
    • hasSeedSentencepiecesFile

      public boolean hasSeedSentencepiecesFile()
       Path to a seed sentencepieces file, with one tab-separated
       seed sentencepiece <tab> frequency per line.
       
      optional string seed_sentencepieces_file = 54 [default = ""];
      Specified by:
      hasSeedSentencepiecesFile in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      Whether the seedSentencepiecesFile field is set.
    • getSeedSentencepiecesFile

      public String getSeedSentencepiecesFile()
       Path to a seed sentencepieces file, with one tab-separated
       seed sentencepiece <tab> frequency per line.
       
      optional string seed_sentencepieces_file = 54 [default = ""];
      Specified by:
      getSeedSentencepiecesFile in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The seedSentencepiecesFile.
    • getSeedSentencepiecesFileBytes

      public com.google.protobuf.ByteString getSeedSentencepiecesFileBytes()
       Path to a seed sentencepieces file, with one tab-separated
       seed sentencepiece <tab> frequency per line.
       
      optional string seed_sentencepieces_file = 54 [default = ""];
      Specified by:
      getSeedSentencepiecesFileBytes in interface SentencepieceModel.TrainerSpecOrBuilder
      Returns:
      The bytes for seedSentencepiecesFile.
    • setSeedSentencepiecesFile

      public SentencepieceModel.TrainerSpec.Builder setSeedSentencepiecesFile(String value)
       Path to a seed sentencepieces file, with one tab-separated
       seed sentencepiece <tab> frequency per line.
       
      optional string seed_sentencepieces_file = 54 [default = ""];
      Parameters:
      value - The seedSentencepiecesFile to set.
      Returns:
      This builder for chaining.
    • clearSeedSentencepiecesFile

      public SentencepieceModel.TrainerSpec.Builder clearSeedSentencepiecesFile()
       Path to a seed sentencepieces file, with one tab-separated
       seed sentencepiece <tab> frequency per line.
       
      optional string seed_sentencepieces_file = 54 [default = ""];
      Returns:
      This builder for chaining.
    • setSeedSentencepiecesFileBytes

      public SentencepieceModel.TrainerSpec.Builder setSeedSentencepiecesFileBytes(com.google.protobuf.ByteString value)
       Path to a seed sentencepieces file, with one tab-separated
       seed sentencepiece <tab> frequency per line.
       
      optional string seed_sentencepieces_file = 54 [default = ""];
      Parameters:
      value - The bytes for seedSentencepiecesFile to set.
      Returns:
      This builder for chaining.
    • setUnknownFields

      public final SentencepieceModel.TrainerSpec.Builder setUnknownFields(com.google.protobuf.UnknownFieldSet unknownFields)
      Specified by:
      setUnknownFields in interface com.google.protobuf.Message.Builder
      Overrides:
      setUnknownFields in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>
    • mergeUnknownFields

      public final SentencepieceModel.TrainerSpec.Builder mergeUnknownFields(com.google.protobuf.UnknownFieldSet unknownFields)
      Specified by:
      mergeUnknownFields in interface com.google.protobuf.Message.Builder
      Overrides:
      mergeUnknownFields in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>