java.lang.Object

com.google.protobuf.AbstractMessageLite.Builder

com.google.protobuf.AbstractMessage.Builder<BuilderT>

com.google.protobuf.GeneratedMessageV3.Builder<BuilderT>

com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>

com.google.genai.proto.SentencepieceModel.TrainerSpec.Builder

All Implemented Interfaces:: SentencepieceModel.TrainerSpecOrBuilder, com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder<SentencepieceModel.TrainerSpec>, com.google.protobuf.Message.Builder, com.google.protobuf.MessageLite.Builder, com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder, Cloneable

Enclosing class:: SentencepieceModel.TrainerSpec

public static final class SentencepieceModel.TrainerSpec.Builder extends com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder> implements SentencepieceModel.TrainerSpecOrBuilder

 TrainerSpec encodes a various parameters for SentencePiece training.
 Next id: 55

Protobuf type com.google.genai.proto.TrainerSpec

Method Summary

Modifier and Type

Method

Description

SentencepieceModel.TrainerSpec.Builder

addAcceptLanguage(String value)

List of the languages this model can accept.

SentencepieceModel.TrainerSpec.Builder

addAcceptLanguageBytes(com.google.protobuf.ByteString value)

List of the languages this model can accept.

SentencepieceModel.TrainerSpec.Builder

addAllAcceptLanguage(Iterable<String> values)

List of the languages this model can accept.

SentencepieceModel.TrainerSpec.Builder

addAllControlSymbols(Iterable<String> values)

///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder.

SentencepieceModel.TrainerSpec.Builder

addAllInput(Iterable<String> values)

///////////////////////////////////////////////////////////////// General parameters Input corpus files.

SentencepieceModel.TrainerSpec.Builder

addAllUserDefinedSymbols(Iterable<String> values)

Defines user defined symbols.

SentencepieceModel.TrainerSpec.Builder

addControlSymbols(String value)

///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder.

SentencepieceModel.TrainerSpec.Builder

addControlSymbolsBytes(com.google.protobuf.ByteString value)

///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder.

<Type> SentencepieceModel.TrainerSpec.Builder

addExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,List<Type>> extension, Type value)

SentencepieceModel.TrainerSpec.Builder

addInput(String value)

///////////////////////////////////////////////////////////////// General parameters Input corpus files.

SentencepieceModel.TrainerSpec.Builder

addInputBytes(com.google.protobuf.ByteString value)

///////////////////////////////////////////////////////////////// General parameters Input corpus files.

SentencepieceModel.TrainerSpec.Builder

addRepeatedField(com.google.protobuf.Descriptors.FieldDescriptor field, Object value)

SentencepieceModel.TrainerSpec.Builder

addUserDefinedSymbols(String value)

Defines user defined symbols.

SentencepieceModel.TrainerSpec.Builder

addUserDefinedSymbolsBytes(com.google.protobuf.ByteString value)

Defines user defined symbols.

SentencepieceModel.TrainerSpec

build()

SentencepieceModel.TrainerSpec

buildPartial()

SentencepieceModel.TrainerSpec.Builder

clear()

SentencepieceModel.TrainerSpec.Builder

clearAcceptLanguage()

List of the languages this model can accept.

SentencepieceModel.TrainerSpec.Builder

clearAllowWhitespaceOnlyPieces()

Allows pieces that only contain whitespaces instead of appearing only as prefix or suffix of other pieces.

SentencepieceModel.TrainerSpec.Builder

clearBosId()

<s>

SentencepieceModel.TrainerSpec.Builder

clearBosPiece()

optional string bos_piece = 46 [default = "<s>"];

SentencepieceModel.TrainerSpec.Builder

clearByteFallback()

Decomposes unknown pieces into UTF-8 bytes.

SentencepieceModel.TrainerSpec.Builder

clearCharacterCoverage()

///////////////////////////////////////////////////////////////// Training parameters.

SentencepieceModel.TrainerSpec.Builder

clearControlSymbols()

///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder.

SentencepieceModel.TrainerSpec.Builder

clearDifferentialPrivacyClippingThreshold()

Clipping threshold to apply after adding noise.

SentencepieceModel.TrainerSpec.Builder

clearDifferentialPrivacyNoiseLevel()

Set these parameters if you need DP version of sentencepiece.

SentencepieceModel.TrainerSpec.Builder

clearEnableDifferentialPrivacy()

Whether to use DP version of sentencepiece.

SentencepieceModel.TrainerSpec.Builder

clearEosId()

</s>

SentencepieceModel.TrainerSpec.Builder

clearEosPiece()

optional string eos_piece = 47 [default = "</s>"];

<T> SentencepieceModel.TrainerSpec.Builder

clearExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,T> extension)

SentencepieceModel.TrainerSpec.Builder

clearField(com.google.protobuf.Descriptors.FieldDescriptor field)

SentencepieceModel.TrainerSpec.Builder

clearHardVocabLimit()

`vocab_size` is treated as hard limit.

SentencepieceModel.TrainerSpec.Builder

clearInput()

///////////////////////////////////////////////////////////////// General parameters Input corpus files.

SentencepieceModel.TrainerSpec.Builder

clearInputFormat()

Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq

SentencepieceModel.TrainerSpec.Builder

clearInputSentenceSize()

Maximum size of sentences the trainer loads from `input` parameter.

SentencepieceModel.TrainerSpec.Builder

clearMaxSentenceLength()

The maximum sentence length in byte.

SentencepieceModel.TrainerSpec.Builder

clearMaxSentencepieceLength()

///////////////////////////////////////////////////////////////// SentencePiece parameters which control the shapes of sentence piece.

SentencepieceModel.TrainerSpec.Builder

clearMiningSentenceSize()

Deprecated.

SentencepieceModel.TrainerSpec.Builder

clearModelPrefix()

Output model file prefix.

SentencepieceModel.TrainerSpec.Builder

clearModelType()

optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];

SentencepieceModel.TrainerSpec.Builder

clearNumSubIterations()

Number of EM sub iterations.

SentencepieceModel.TrainerSpec.Builder

clearNumThreads()

Number of threads in the training.

SentencepieceModel.TrainerSpec.Builder

clearOneof(com.google.protobuf.Descriptors.OneofDescriptor oneof)

SentencepieceModel.TrainerSpec.Builder

clearPadId()

<pad> (padding)

SentencepieceModel.TrainerSpec.Builder

clearPadPiece()

optional string pad_piece = 48 [default = "<pad>"];

SentencepieceModel.TrainerSpec.Builder

clearPretokenizationDelimiter()

Defines the pre-tokenization delimiter.

SentencepieceModel.TrainerSpec.Builder

clearRequiredChars()

Defines required characters.

SentencepieceModel.TrainerSpec.Builder

clearSeedSentencepiecesFile()

Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.

SentencepieceModel.TrainerSpec.Builder

clearSeedSentencepieceSize()

The size of seed sentencepieces.

SentencepieceModel.TrainerSpec.Builder

clearSelfTestSampleSize()

Size of self-test samples, which are encoded in the model file.

SentencepieceModel.TrainerSpec.Builder

clearShrinkingFactor()

In every EM sub-iterations, keeps top `shrinking_factor` * `current sentencepieces size` with respect to the loss of the sentence piece.

SentencepieceModel.TrainerSpec.Builder

clearShuffleInputSentence()

optional bool shuffle_input_sentence = 19 [default = true];

SentencepieceModel.TrainerSpec.Builder

clearSplitByNumber()

When `split_by_number` is true, put a boundary between number and non-number transition.

SentencepieceModel.TrainerSpec.Builder

clearSplitByUnicodeScript()

Uses Unicode script to split sentence pieces.

SentencepieceModel.TrainerSpec.Builder

clearSplitByWhitespace()

Use a white space to split sentence pieces.

SentencepieceModel.TrainerSpec.Builder

clearSplitDigits()

Split all digits (0-9) into separate pieces.

SentencepieceModel.TrainerSpec.Builder

clearTrainExtremelyLargeCorpus()

Increase bit depth to allow unigram model training on large (>10M sentences) corpora.

SentencepieceModel.TrainerSpec.Builder

clearTrainingSentenceSize()

Deprecated.

SentencepieceModel.TrainerSpec.Builder

clearTreatWhitespaceAsSuffix()

Adds whitespace symbol (_) as a suffix instead of prefix.

SentencepieceModel.TrainerSpec.Builder

clearUnkId()

///////////////////////////////////////////////////////////////// Reserved special meta tokens.

SentencepieceModel.TrainerSpec.Builder

clearUnkPiece()

optional string unk_piece = 45 [default = "<unk>"];

SentencepieceModel.TrainerSpec.Builder

clearUnkSurface()

Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer.

SentencepieceModel.TrainerSpec.Builder

clearUseAllVocab()

use all symbols for vocab extraction.

SentencepieceModel.TrainerSpec.Builder

clearUserDefinedSymbols()

Defines user defined symbols.

SentencepieceModel.TrainerSpec.Builder

clearVocabSize()

Vocabulary size.

SentencepieceModel.TrainerSpec.Builder

clearVocabularyOutputPieceScore()

When creating the vocabulary file, defines whether or not to additionally output the score for each piece.

SentencepieceModel.TrainerSpec.Builder

clone()

String

getAcceptLanguage(int index)

List of the languages this model can accept.

com.google.protobuf.ByteString

getAcceptLanguageBytes(int index)

List of the languages this model can accept.

int

getAcceptLanguageCount()

List of the languages this model can accept.

com.google.protobuf.ProtocolStringList

getAcceptLanguageList()

List of the languages this model can accept.

boolean

getAllowWhitespaceOnlyPieces()

Allows pieces that only contain whitespaces instead of appearing only as prefix or suffix of other pieces.

int

getBosId()

<s>

String

getBosPiece()

optional string bos_piece = 46 [default = "<s>"];

com.google.protobuf.ByteString

getBosPieceBytes()

optional string bos_piece = 46 [default = "<s>"];

boolean

getByteFallback()

Decomposes unknown pieces into UTF-8 bytes.

float

getCharacterCoverage()

///////////////////////////////////////////////////////////////// Training parameters.

String

getControlSymbols(int index)

///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder.

com.google.protobuf.ByteString

getControlSymbolsBytes(int index)

///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder.

int

getControlSymbolsCount()

///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder.

com.google.protobuf.ProtocolStringList

getControlSymbolsList()

///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder.

SentencepieceModel.TrainerSpec

getDefaultInstanceForType()

static final com.google.protobuf.Descriptors.Descriptor

getDescriptor()

com.google.protobuf.Descriptors.Descriptor

getDescriptorForType()

long

getDifferentialPrivacyClippingThreshold()

Clipping threshold to apply after adding noise.

float

getDifferentialPrivacyNoiseLevel()

Set these parameters if you need DP version of sentencepiece.

boolean

getEnableDifferentialPrivacy()

Whether to use DP version of sentencepiece.

int

getEosId()

</s>

String

getEosPiece()

optional string eos_piece = 47 [default = "</s>"];

com.google.protobuf.ByteString

getEosPieceBytes()

optional string eos_piece = 47 [default = "</s>"];

boolean

getHardVocabLimit()

`vocab_size` is treated as hard limit.

String

getInput(int index)

///////////////////////////////////////////////////////////////// General parameters Input corpus files.

com.google.protobuf.ByteString

getInputBytes(int index)

///////////////////////////////////////////////////////////////// General parameters Input corpus files.

int

getInputCount()

///////////////////////////////////////////////////////////////// General parameters Input corpus files.

String

getInputFormat()

Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq

com.google.protobuf.ByteString

getInputFormatBytes()

Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq

com.google.protobuf.ProtocolStringList

getInputList()

///////////////////////////////////////////////////////////////// General parameters Input corpus files.

long

getInputSentenceSize()

Maximum size of sentences the trainer loads from `input` parameter.

int

getMaxSentenceLength()

The maximum sentence length in byte.

int

getMaxSentencepieceLength()

///////////////////////////////////////////////////////////////// SentencePiece parameters which control the shapes of sentence piece.

int

getMiningSentenceSize()

Deprecated.
com.google.genai.proto.TrainerSpec.mining_sentence_size is deprecated.

String

getModelPrefix()

Output model file prefix.

com.google.protobuf.ByteString

getModelPrefixBytes()

Output model file prefix.

SentencepieceModel.TrainerSpec.ModelType

getModelType()

optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];

int

getNumSubIterations()

Number of EM sub iterations.

int

getNumThreads()

Number of threads in the training.

int

getPadId()

<pad> (padding)

String

getPadPiece()

optional string pad_piece = 48 [default = "<pad>"];

com.google.protobuf.ByteString

getPadPieceBytes()

optional string pad_piece = 48 [default = "<pad>"];

String

getPretokenizationDelimiter()

Defines the pre-tokenization delimiter.

com.google.protobuf.ByteString

getPretokenizationDelimiterBytes()

Defines the pre-tokenization delimiter.

String

getRequiredChars()

Defines required characters.

com.google.protobuf.ByteString

getRequiredCharsBytes()

Defines required characters.

String

getSeedSentencepiecesFile()

Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.

com.google.protobuf.ByteString

getSeedSentencepiecesFileBytes()

Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.

int

getSeedSentencepieceSize()

The size of seed sentencepieces.

int

getSelfTestSampleSize()

Size of self-test samples, which are encoded in the model file.

float

getShrinkingFactor()

In every EM sub-iterations, keeps top `shrinking_factor` * `current sentencepieces size` with respect to the loss of the sentence piece.

boolean

getShuffleInputSentence()

optional bool shuffle_input_sentence = 19 [default = true];

boolean

getSplitByNumber()

When `split_by_number` is true, put a boundary between number and non-number transition.

boolean

getSplitByUnicodeScript()

Uses Unicode script to split sentence pieces.

boolean

getSplitByWhitespace()

Use a white space to split sentence pieces.

boolean

getSplitDigits()

Split all digits (0-9) into separate pieces.

boolean

getTrainExtremelyLargeCorpus()

Increase bit depth to allow unigram model training on large (>10M sentences) corpora.

int

getTrainingSentenceSize()

Deprecated.
com.google.genai.proto.TrainerSpec.training_sentence_size is deprecated.

boolean

getTreatWhitespaceAsSuffix()

Adds whitespace symbol (_) as a suffix instead of prefix.

int

getUnkId()

///////////////////////////////////////////////////////////////// Reserved special meta tokens.

String

getUnkPiece()

optional string unk_piece = 45 [default = "<unk>"];

com.google.protobuf.ByteString

getUnkPieceBytes()

optional string unk_piece = 45 [default = "<unk>"];

String

getUnkSurface()

Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer.

com.google.protobuf.ByteString

getUnkSurfaceBytes()

Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer.

boolean

getUseAllVocab()

use all symbols for vocab extraction.

String

getUserDefinedSymbols(int index)

Defines user defined symbols.

com.google.protobuf.ByteString

getUserDefinedSymbolsBytes(int index)

Defines user defined symbols.

int

getUserDefinedSymbolsCount()

Defines user defined symbols.

com.google.protobuf.ProtocolStringList

getUserDefinedSymbolsList()

Defines user defined symbols.

int

getVocabSize()

Vocabulary size.

boolean

getVocabularyOutputPieceScore()

When creating the vocabulary file, defines whether or not to additionally output the score for each piece.

boolean

hasAllowWhitespaceOnlyPieces()

Allows pieces that only contain whitespaces instead of appearing only as prefix or suffix of other pieces.

boolean

hasBosId()

<s>

boolean

hasBosPiece()

optional string bos_piece = 46 [default = "<s>"];

boolean

hasByteFallback()

Decomposes unknown pieces into UTF-8 bytes.

boolean

hasCharacterCoverage()

///////////////////////////////////////////////////////////////// Training parameters.

boolean

hasDifferentialPrivacyClippingThreshold()

Clipping threshold to apply after adding noise.

boolean

hasDifferentialPrivacyNoiseLevel()

Set these parameters if you need DP version of sentencepiece.

boolean

hasEnableDifferentialPrivacy()

Whether to use DP version of sentencepiece.

boolean

hasEosId()

</s>

boolean

hasEosPiece()

optional string eos_piece = 47 [default = "</s>"];

boolean

hasHardVocabLimit()

`vocab_size` is treated as hard limit.

boolean

hasInputFormat()

Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq

boolean

hasInputSentenceSize()

Maximum size of sentences the trainer loads from `input` parameter.

boolean

hasMaxSentenceLength()

The maximum sentence length in byte.

boolean

hasMaxSentencepieceLength()

///////////////////////////////////////////////////////////////// SentencePiece parameters which control the shapes of sentence piece.

boolean

hasMiningSentenceSize()

Deprecated.
com.google.genai.proto.TrainerSpec.mining_sentence_size is deprecated.

boolean

hasModelPrefix()

Output model file prefix.

boolean

hasModelType()

optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];

boolean

hasNumSubIterations()

Number of EM sub iterations.

boolean

hasNumThreads()

Number of threads in the training.

boolean

hasPadId()

<pad> (padding)

boolean

hasPadPiece()

optional string pad_piece = 48 [default = "<pad>"];

boolean

hasPretokenizationDelimiter()

Defines the pre-tokenization delimiter.

boolean

hasRequiredChars()

Defines required characters.

boolean

hasSeedSentencepiecesFile()

Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.

boolean

hasSeedSentencepieceSize()

The size of seed sentencepieces.

boolean

hasSelfTestSampleSize()

Size of self-test samples, which are encoded in the model file.

boolean

hasShrinkingFactor()

In every EM sub-iterations, keeps top `shrinking_factor` * `current sentencepieces size` with respect to the loss of the sentence piece.

boolean

hasShuffleInputSentence()

optional bool shuffle_input_sentence = 19 [default = true];

boolean

hasSplitByNumber()

When `split_by_number` is true, put a boundary between number and non-number transition.

boolean

hasSplitByUnicodeScript()

Uses Unicode script to split sentence pieces.

boolean

hasSplitByWhitespace()

Use a white space to split sentence pieces.

boolean

hasSplitDigits()

Split all digits (0-9) into separate pieces.

boolean

hasTrainExtremelyLargeCorpus()

Increase bit depth to allow unigram model training on large (>10M sentences) corpora.

boolean

hasTrainingSentenceSize()

Deprecated.
com.google.genai.proto.TrainerSpec.training_sentence_size is deprecated.

boolean

hasTreatWhitespaceAsSuffix()

Adds whitespace symbol (_) as a suffix instead of prefix.

boolean

hasUnkId()

///////////////////////////////////////////////////////////////// Reserved special meta tokens.

boolean

hasUnkPiece()

optional string unk_piece = 45 [default = "<unk>"];

boolean

hasUnkSurface()

Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer.

boolean

hasUseAllVocab()

use all symbols for vocab extraction.

boolean

hasVocabSize()

Vocabulary size.

boolean

hasVocabularyOutputPieceScore()

When creating the vocabulary file, defines whether or not to additionally output the score for each piece.

final boolean

isInitialized()

SentencepieceModel.TrainerSpec.Builder

mergeFrom(SentencepieceModel.TrainerSpec other)

SentencepieceModel.TrainerSpec.Builder

mergeFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry)

SentencepieceModel.TrainerSpec.Builder

mergeFrom(com.google.protobuf.Message other)

final SentencepieceModel.TrainerSpec.Builder

mergeUnknownFields(com.google.protobuf.UnknownFieldSet unknownFields)

SentencepieceModel.TrainerSpec.Builder

setAcceptLanguage(int index, String value)

List of the languages this model can accept.

SentencepieceModel.TrainerSpec.Builder

setAllowWhitespaceOnlyPieces(boolean value)

Allows pieces that only contain whitespaces instead of appearing only as prefix or suffix of other pieces.

SentencepieceModel.TrainerSpec.Builder

setBosId(int value)

<s>

SentencepieceModel.TrainerSpec.Builder

setBosPiece(String value)

optional string bos_piece = 46 [default = "<s>"];

SentencepieceModel.TrainerSpec.Builder

setBosPieceBytes(com.google.protobuf.ByteString value)

optional string bos_piece = 46 [default = "<s>"];

SentencepieceModel.TrainerSpec.Builder

setByteFallback(boolean value)

Decomposes unknown pieces into UTF-8 bytes.

SentencepieceModel.TrainerSpec.Builder

setCharacterCoverage(float value)

///////////////////////////////////////////////////////////////// Training parameters.

SentencepieceModel.TrainerSpec.Builder

setControlSymbols(int index, String value)

///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder.

SentencepieceModel.TrainerSpec.Builder

setDifferentialPrivacyClippingThreshold(long value)

Clipping threshold to apply after adding noise.

SentencepieceModel.TrainerSpec.Builder

setDifferentialPrivacyNoiseLevel(float value)

Set these parameters if you need DP version of sentencepiece.

SentencepieceModel.TrainerSpec.Builder

setEnableDifferentialPrivacy(boolean value)

Whether to use DP version of sentencepiece.

SentencepieceModel.TrainerSpec.Builder

setEosId(int value)

</s>

SentencepieceModel.TrainerSpec.Builder

setEosPiece(String value)

optional string eos_piece = 47 [default = "</s>"];

SentencepieceModel.TrainerSpec.Builder

setEosPieceBytes(com.google.protobuf.ByteString value)

optional string eos_piece = 47 [default = "</s>"];

<Type> SentencepieceModel.TrainerSpec.Builder

setExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,List<Type>> extension, int index, Type value)

<Type> SentencepieceModel.TrainerSpec.Builder

setExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,Type> extension, Type value)

SentencepieceModel.TrainerSpec.Builder

setField(com.google.protobuf.Descriptors.FieldDescriptor field, Object value)

SentencepieceModel.TrainerSpec.Builder

setHardVocabLimit(boolean value)

`vocab_size` is treated as hard limit.

SentencepieceModel.TrainerSpec.Builder

setInput(int index, String value)

///////////////////////////////////////////////////////////////// General parameters Input corpus files.

SentencepieceModel.TrainerSpec.Builder

setInputFormat(String value)

Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq

SentencepieceModel.TrainerSpec.Builder

setInputFormatBytes(com.google.protobuf.ByteString value)

Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq

SentencepieceModel.TrainerSpec.Builder

setInputSentenceSize(long value)

Maximum size of sentences the trainer loads from `input` parameter.

SentencepieceModel.TrainerSpec.Builder

setMaxSentenceLength(int value)

The maximum sentence length in byte.

SentencepieceModel.TrainerSpec.Builder

setMaxSentencepieceLength(int value)

///////////////////////////////////////////////////////////////// SentencePiece parameters which control the shapes of sentence piece.

SentencepieceModel.TrainerSpec.Builder

setMiningSentenceSize(int value)

Deprecated.

SentencepieceModel.TrainerSpec.Builder

setModelPrefix(String value)

Output model file prefix.

SentencepieceModel.TrainerSpec.Builder

setModelPrefixBytes(com.google.protobuf.ByteString value)

Output model file prefix.

SentencepieceModel.TrainerSpec.Builder

setModelType(SentencepieceModel.TrainerSpec.ModelType value)

optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];

SentencepieceModel.TrainerSpec.Builder

setNumSubIterations(int value)

Number of EM sub iterations.

SentencepieceModel.TrainerSpec.Builder

setNumThreads(int value)

Number of threads in the training.

SentencepieceModel.TrainerSpec.Builder

setPadId(int value)

<pad> (padding)

SentencepieceModel.TrainerSpec.Builder

setPadPiece(String value)

optional string pad_piece = 48 [default = "<pad>"];

SentencepieceModel.TrainerSpec.Builder

setPadPieceBytes(com.google.protobuf.ByteString value)

optional string pad_piece = 48 [default = "<pad>"];

SentencepieceModel.TrainerSpec.Builder

setPretokenizationDelimiter(String value)

Defines the pre-tokenization delimiter.

SentencepieceModel.TrainerSpec.Builder

setPretokenizationDelimiterBytes(com.google.protobuf.ByteString value)

Defines the pre-tokenization delimiter.

SentencepieceModel.TrainerSpec.Builder

setRepeatedField(com.google.protobuf.Descriptors.FieldDescriptor field, int index, Object value)

SentencepieceModel.TrainerSpec.Builder

setRequiredChars(String value)

Defines required characters.

SentencepieceModel.TrainerSpec.Builder

setRequiredCharsBytes(com.google.protobuf.ByteString value)

Defines required characters.

SentencepieceModel.TrainerSpec.Builder

setSeedSentencepiecesFile(String value)

Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.

SentencepieceModel.TrainerSpec.Builder

setSeedSentencepiecesFileBytes(com.google.protobuf.ByteString value)

Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.

SentencepieceModel.TrainerSpec.Builder

setSeedSentencepieceSize(int value)

The size of seed sentencepieces.

SentencepieceModel.TrainerSpec.Builder

setSelfTestSampleSize(int value)

Size of self-test samples, which are encoded in the model file.

SentencepieceModel.TrainerSpec.Builder

setShrinkingFactor(float value)

In every EM sub-iterations, keeps top `shrinking_factor` * `current sentencepieces size` with respect to the loss of the sentence piece.

SentencepieceModel.TrainerSpec.Builder

setShuffleInputSentence(boolean value)

optional bool shuffle_input_sentence = 19 [default = true];

SentencepieceModel.TrainerSpec.Builder

setSplitByNumber(boolean value)

When `split_by_number` is true, put a boundary between number and non-number transition.

SentencepieceModel.TrainerSpec.Builder

setSplitByUnicodeScript(boolean value)

Uses Unicode script to split sentence pieces.

SentencepieceModel.TrainerSpec.Builder

setSplitByWhitespace(boolean value)

Use a white space to split sentence pieces.

SentencepieceModel.TrainerSpec.Builder

setSplitDigits(boolean value)

Split all digits (0-9) into separate pieces.

SentencepieceModel.TrainerSpec.Builder

setTrainExtremelyLargeCorpus(boolean value)

Increase bit depth to allow unigram model training on large (>10M sentences) corpora.

SentencepieceModel.TrainerSpec.Builder

setTrainingSentenceSize(int value)

Deprecated.

SentencepieceModel.TrainerSpec.Builder

setTreatWhitespaceAsSuffix(boolean value)

Adds whitespace symbol (_) as a suffix instead of prefix.

SentencepieceModel.TrainerSpec.Builder

setUnkId(int value)

///////////////////////////////////////////////////////////////// Reserved special meta tokens.

final SentencepieceModel.TrainerSpec.Builder

setUnknownFields(com.google.protobuf.UnknownFieldSet unknownFields)

SentencepieceModel.TrainerSpec.Builder

setUnkPiece(String value)

optional string unk_piece = 45 [default = "<unk>"];

SentencepieceModel.TrainerSpec.Builder

setUnkPieceBytes(com.google.protobuf.ByteString value)

optional string unk_piece = 45 [default = "<unk>"];

SentencepieceModel.TrainerSpec.Builder

setUnkSurface(String value)

Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer.

SentencepieceModel.TrainerSpec.Builder

setUnkSurfaceBytes(com.google.protobuf.ByteString value)

Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer.

SentencepieceModel.TrainerSpec.Builder

setUseAllVocab(boolean value)

use all symbols for vocab extraction.

SentencepieceModel.TrainerSpec.Builder

setUserDefinedSymbols(int index, String value)

Defines user defined symbols.

SentencepieceModel.TrainerSpec.Builder

setVocabSize(int value)

Vocabulary size.

SentencepieceModel.TrainerSpec.Builder

setVocabularyOutputPieceScore(boolean value)

When creating the vocabulary file, defines whether or not to additionally output the score for each piece.

Methods inherited from class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder
addExtension, addExtension, clearExtension, clearExtension, getAllFields, getExtension, getExtension, getExtension, getExtension, getExtension, getExtension, getExtensionCount, getExtensionCount, getExtensionCount, getField, getFieldBuilder, getRepeatedField, getRepeatedFieldBuilder, getRepeatedFieldCount, hasExtension, hasExtension, hasExtension, hasField, newBuilderForField, setExtension, setExtension, setExtension, setExtension

Methods inherited from class com.google.protobuf.GeneratedMessageV3.Builder
getOneofFieldDescriptor, getUnknownFields, hasOneof

Methods inherited from class com.google.protobuf.AbstractMessage.Builder
findInitializationErrors, getInitializationErrorString, mergeFrom, mergeFrom, mergeFrom, mergeFrom, mergeFrom, mergeFrom, mergeFrom, mergeFrom, mergeFrom, toString

Methods inherited from class com.google.protobuf.AbstractMessageLite.Builder
mergeDelimitedFrom, mergeDelimitedFrom, mergeFrom

Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait

Methods inherited from interface com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder
getExtension, getExtension, getExtension, getExtension, getExtension, getExtension, getExtensionCount, getExtensionCount, getExtensionCount, hasExtension, hasExtension, hasExtension

Methods inherited from interface com.google.protobuf.Message.Builder
mergeDelimitedFrom, mergeDelimitedFrom

Methods inherited from interface com.google.protobuf.MessageLite.Builder
mergeFrom

Methods inherited from interface com.google.protobuf.MessageOrBuilder
findInitializationErrors, getAllFields, getField, getInitializationErrorString, getOneofFieldDescriptor, getRepeatedField, getRepeatedFieldCount, getUnknownFields, hasField, hasOneof

Method Details
- getDescriptor
  
  public static final com.google.protobuf.Descriptors.Descriptor getDescriptor()
- clear
  
  public SentencepieceModel.TrainerSpec.Builder clear()
  
  Specified by:
  
  clear in interface com.google.protobuf.Message.Builder
  
  Specified by:
  
  clear in interface com.google.protobuf.MessageLite.Builder
  
  Overrides:
  
  clear in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- getDescriptorForType
  
  public com.google.protobuf.Descriptors.Descriptor getDescriptorForType()
  
  Specified by:
  
  getDescriptorForType in interface com.google.protobuf.Message.Builder
  
  Specified by:
  
  getDescriptorForType in interface com.google.protobuf.MessageOrBuilder
  
  Overrides:
  
  getDescriptorForType in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>
- getDefaultInstanceForType
  
  public SentencepieceModel.TrainerSpec getDefaultInstanceForType()
  
  Specified by:
  
  getDefaultInstanceForType in interface com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder<SentencepieceModel.TrainerSpec>
  
  Specified by:
  
  getDefaultInstanceForType in interface com.google.protobuf.MessageLiteOrBuilder
  
  Specified by:
  
  getDefaultInstanceForType in interface com.google.protobuf.MessageOrBuilder
- build
  
  public SentencepieceModel.TrainerSpec build()
  
  Specified by:
  
  build in interface com.google.protobuf.Message.Builder
  
  Specified by:
  
  build in interface com.google.protobuf.MessageLite.Builder
- buildPartial
  
  public SentencepieceModel.TrainerSpec buildPartial()
  
  Specified by:
  
  buildPartial in interface com.google.protobuf.Message.Builder
  
  Specified by:
  
  buildPartial in interface com.google.protobuf.MessageLite.Builder
- clone
  
  public SentencepieceModel.TrainerSpec.Builder clone()
  
  Specified by:
  
  clone in interface com.google.protobuf.Message.Builder
  
  Specified by:
  
  clone in interface com.google.protobuf.MessageLite.Builder
  
  Overrides:
  
  clone in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>
- setField
  
  public SentencepieceModel.TrainerSpec.Builder setField(com.google.protobuf.Descriptors.FieldDescriptor field, Object value)
  
  Specified by:
  
  setField in interface com.google.protobuf.Message.Builder
  
  Overrides:
  
  setField in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- clearField
  
  public SentencepieceModel.TrainerSpec.Builder clearField(com.google.protobuf.Descriptors.FieldDescriptor field)
  
  Specified by:
  
  clearField in interface com.google.protobuf.Message.Builder
  
  Overrides:
  
  clearField in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- clearOneof
  
  public SentencepieceModel.TrainerSpec.Builder clearOneof(com.google.protobuf.Descriptors.OneofDescriptor oneof)
  
  Specified by:
  
  clearOneof in interface com.google.protobuf.Message.Builder
  
  Overrides:
  
  clearOneof in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>
- setRepeatedField
  
  public SentencepieceModel.TrainerSpec.Builder setRepeatedField(com.google.protobuf.Descriptors.FieldDescriptor field, int index, Object value)
  
  Specified by:
  
  setRepeatedField in interface com.google.protobuf.Message.Builder
  
  Overrides:
  
  setRepeatedField in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- addRepeatedField
  
  public SentencepieceModel.TrainerSpec.Builder addRepeatedField(com.google.protobuf.Descriptors.FieldDescriptor field, Object value)
  
  Specified by:
  
  addRepeatedField in interface com.google.protobuf.Message.Builder
  
  Overrides:
  
  addRepeatedField in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- setExtension
  
  public <Type> SentencepieceModel.TrainerSpec.Builder setExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,Type> extension, Type value)
  
  Overrides:
  
  setExtension in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- setExtension
  
  public <Type> SentencepieceModel.TrainerSpec.Builder setExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,List<Type>> extension, int index, Type value)
  
  Overrides:
  
  setExtension in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- addExtension
  
  public <Type> SentencepieceModel.TrainerSpec.Builder addExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,List<Type>> extension, Type value)
  
  Overrides:
  
  addExtension in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- clearExtension
  
  public <T> SentencepieceModel.TrainerSpec.Builder clearExtension(com.google.protobuf.GeneratedMessage.GeneratedExtension<SentencepieceModel.TrainerSpec,T> extension)
  
  Overrides:
  
  clearExtension in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- mergeFrom
  
  public SentencepieceModel.TrainerSpec.Builder mergeFrom(com.google.protobuf.Message other)
  
  Specified by:
  
  mergeFrom in interface com.google.protobuf.Message.Builder
  
  Overrides:
  
  mergeFrom in class com.google.protobuf.AbstractMessage.Builder<SentencepieceModel.TrainerSpec.Builder>
- mergeFrom
  
  public SentencepieceModel.TrainerSpec.Builder mergeFrom(SentencepieceModel.TrainerSpec other)
- isInitialized
  
  public final boolean isInitialized()
  
  Specified by:
  
  isInitialized in interface com.google.protobuf.MessageLiteOrBuilder
  
  Overrides:
  
  isInitialized in class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder<SentencepieceModel.TrainerSpec,SentencepieceModel.TrainerSpec.Builder>
- mergeFrom
  
  public SentencepieceModel.TrainerSpec.Builder mergeFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws IOException
  
  Specified by:
  
  mergeFrom in interface com.google.protobuf.Message.Builder
  
  Specified by:
  
  mergeFrom in interface com.google.protobuf.MessageLite.Builder
  
  Overrides:
  
  mergeFrom in class com.google.protobuf.AbstractMessage.Builder<SentencepieceModel.TrainerSpec.Builder>
  
  Throws:
  
  IOException
- getInputList
  
  public com.google.protobuf.ProtocolStringList getInputList()
  ///////////////////////////////////////////////////////////////// General parameters Input corpus files. Trainer accepts the following two formats: A) Monolingual: plain text, one sentence per line. B) Bilingual: TSV, source sentence <tab> target sentence When bilingual data is passed, shared vocabulary model is built. Note that the input file must be raw corpus, not a preprocessed corpus. Trainer only loads the first `input_sentence_size` sentences specified with this parameter.
  repeated string input = 1;
  Specified by:
  
  getInputList in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  A list containing the input.
- getInputCount
  
  public int getInputCount()
  ///////////////////////////////////////////////////////////////// General parameters Input corpus files. Trainer accepts the following two formats: A) Monolingual: plain text, one sentence per line. B) Bilingual: TSV, source sentence <tab> target sentence When bilingual data is passed, shared vocabulary model is built. Note that the input file must be raw corpus, not a preprocessed corpus. Trainer only loads the first `input_sentence_size` sentences specified with this parameter.
  repeated string input = 1;
  Specified by:
  
  getInputCount in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The count of input.
- getInput
  
  public String getInput(int index)
  ///////////////////////////////////////////////////////////////// General parameters Input corpus files. Trainer accepts the following two formats: A) Monolingual: plain text, one sentence per line. B) Bilingual: TSV, source sentence <tab> target sentence When bilingual data is passed, shared vocabulary model is built. Note that the input file must be raw corpus, not a preprocessed corpus. Trainer only loads the first `input_sentence_size` sentences specified with this parameter.
  repeated string input = 1;
  Specified by:
  
  getInput in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Parameters:
  
  index - The index of the element to return.
  
  Returns:
  
  The input at the given index.
- getInputBytes
  
  public com.google.protobuf.ByteString getInputBytes(int index)
  ///////////////////////////////////////////////////////////////// General parameters Input corpus files. Trainer accepts the following two formats: A) Monolingual: plain text, one sentence per line. B) Bilingual: TSV, source sentence <tab> target sentence When bilingual data is passed, shared vocabulary model is built. Note that the input file must be raw corpus, not a preprocessed corpus. Trainer only loads the first `input_sentence_size` sentences specified with this parameter.
  repeated string input = 1;
  Specified by:
  
  getInputBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Parameters:
  
  index - The index of the value to return.
  
  Returns:
  
  The bytes of the input at the given index.
- setInput
  
  public SentencepieceModel.TrainerSpec.Builder setInput(int index, String value)
  ///////////////////////////////////////////////////////////////// General parameters Input corpus files. Trainer accepts the following two formats: A) Monolingual: plain text, one sentence per line. B) Bilingual: TSV, source sentence <tab> target sentence When bilingual data is passed, shared vocabulary model is built. Note that the input file must be raw corpus, not a preprocessed corpus. Trainer only loads the first `input_sentence_size` sentences specified with this parameter.
  repeated string input = 1;
  Parameters:
  
  index - The index to set the value at.
  
  value - The input to set.
  
  Returns:
  
  This builder for chaining.
- addInput
  
  public SentencepieceModel.TrainerSpec.Builder addInput(String value)
  ///////////////////////////////////////////////////////////////// General parameters Input corpus files. Trainer accepts the following two formats: A) Monolingual: plain text, one sentence per line. B) Bilingual: TSV, source sentence <tab> target sentence When bilingual data is passed, shared vocabulary model is built. Note that the input file must be raw corpus, not a preprocessed corpus. Trainer only loads the first `input_sentence_size` sentences specified with this parameter.
  repeated string input = 1;
  Parameters:
  
  value - The input to add.
  
  Returns:
  
  This builder for chaining.
- addAllInput
  
  public SentencepieceModel.TrainerSpec.Builder addAllInput(Iterable<String> values)
  ///////////////////////////////////////////////////////////////// General parameters Input corpus files. Trainer accepts the following two formats: A) Monolingual: plain text, one sentence per line. B) Bilingual: TSV, source sentence <tab> target sentence When bilingual data is passed, shared vocabulary model is built. Note that the input file must be raw corpus, not a preprocessed corpus. Trainer only loads the first `input_sentence_size` sentences specified with this parameter.
  repeated string input = 1;
  Parameters:
  
  values - The input to add.
  
  Returns:
  
  This builder for chaining.
- clearInput
  
  public SentencepieceModel.TrainerSpec.Builder clearInput()
  ///////////////////////////////////////////////////////////////// General parameters Input corpus files. Trainer accepts the following two formats: A) Monolingual: plain text, one sentence per line. B) Bilingual: TSV, source sentence <tab> target sentence When bilingual data is passed, shared vocabulary model is built. Note that the input file must be raw corpus, not a preprocessed corpus. Trainer only loads the first `input_sentence_size` sentences specified with this parameter.
  repeated string input = 1;
  Returns:
  
  This builder for chaining.
- addInputBytes
  
  public SentencepieceModel.TrainerSpec.Builder addInputBytes(com.google.protobuf.ByteString value)
  ///////////////////////////////////////////////////////////////// General parameters Input corpus files. Trainer accepts the following two formats: A) Monolingual: plain text, one sentence per line. B) Bilingual: TSV, source sentence <tab> target sentence When bilingual data is passed, shared vocabulary model is built. Note that the input file must be raw corpus, not a preprocessed corpus. Trainer only loads the first `input_sentence_size` sentences specified with this parameter.
  repeated string input = 1;
  Parameters:
  
  value - The bytes of the input to add.
  
  Returns:
  
  This builder for chaining.
- hasInputFormat
  
  public boolean hasInputFormat()
  Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq
  optional string input_format = 7;
  Specified by:
  
  hasInputFormat in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the inputFormat field is set.
- getInputFormat
  
  public String getInputFormat()
  Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq
  optional string input_format = 7;
  Specified by:
  
  getInputFormat in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The inputFormat.
- getInputFormatBytes
  
  public com.google.protobuf.ByteString getInputFormatBytes()
  Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq
  optional string input_format = 7;
  Specified by:
  
  getInputFormatBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for inputFormat.
- setInputFormat
  
  public SentencepieceModel.TrainerSpec.Builder setInputFormat(String value)
  Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq
  optional string input_format = 7;
  Parameters:
  
  value - The inputFormat to set.
  
  Returns:
  
  This builder for chaining.
- clearInputFormat
  
  public SentencepieceModel.TrainerSpec.Builder clearInputFormat()
  Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq
  optional string input_format = 7;
  Returns:
  
  This builder for chaining.
- setInputFormatBytes
  
  public SentencepieceModel.TrainerSpec.Builder setInputFormatBytes(com.google.protobuf.ByteString value)
  Input corpus format: "text": one-sentence-per-line text format (default) "tsv": sentence <tab> freq
  optional string input_format = 7;
  Parameters:
  
  value - The bytes for inputFormat to set.
  
  Returns:
  
  This builder for chaining.
- hasModelPrefix
  
  public boolean hasModelPrefix()
  Output model file prefix. <model_prefix>.model and <model_prefix>.vocab are generated.
  optional string model_prefix = 2;
  Specified by:
  
  hasModelPrefix in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the modelPrefix field is set.
- getModelPrefix
  
  public String getModelPrefix()
  Output model file prefix. <model_prefix>.model and <model_prefix>.vocab are generated.
  optional string model_prefix = 2;
  Specified by:
  
  getModelPrefix in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The modelPrefix.
- getModelPrefixBytes
  
  public com.google.protobuf.ByteString getModelPrefixBytes()
  Output model file prefix. <model_prefix>.model and <model_prefix>.vocab are generated.
  optional string model_prefix = 2;
  Specified by:
  
  getModelPrefixBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for modelPrefix.
- setModelPrefix
  
  public SentencepieceModel.TrainerSpec.Builder setModelPrefix(String value)
  Output model file prefix. <model_prefix>.model and <model_prefix>.vocab are generated.
  optional string model_prefix = 2;
  Parameters:
  
  value - The modelPrefix to set.
  
  Returns:
  
  This builder for chaining.
- clearModelPrefix
  
  public SentencepieceModel.TrainerSpec.Builder clearModelPrefix()
  Output model file prefix. <model_prefix>.model and <model_prefix>.vocab are generated.
  optional string model_prefix = 2;
  Returns:
  
  This builder for chaining.
- setModelPrefixBytes
  
  public SentencepieceModel.TrainerSpec.Builder setModelPrefixBytes(com.google.protobuf.ByteString value)
  Output model file prefix. <model_prefix>.model and <model_prefix>.vocab are generated.
  optional string model_prefix = 2;
  Parameters:
  
  value - The bytes for modelPrefix to set.
  
  Returns:
  
  This builder for chaining.
- hasModelType
  
  public boolean hasModelType()
  
  optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
  
  Specified by:
  
  hasModelType in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the modelType field is set.
- getModelType
  
  public SentencepieceModel.TrainerSpec.ModelType getModelType()
  
  optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
  
  Specified by:
  
  getModelType in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The modelType.
- setModelType
  
  public SentencepieceModel.TrainerSpec.Builder setModelType(SentencepieceModel.TrainerSpec.ModelType value)
  
  optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
  
  Parameters:
  
  value - The modelType to set.
  
  Returns:
  
  This builder for chaining.
- clearModelType
  
  public SentencepieceModel.TrainerSpec.Builder clearModelType()
  
  optional .com.google.genai.proto.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM];
  
  Returns:
  
  This builder for chaining.
- hasVocabSize
  
  public boolean hasVocabSize()
  Vocabulary size. 8k is the default size.
  optional int32 vocab_size = 4 [default = 8000];
  Specified by:
  
  hasVocabSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the vocabSize field is set.
- getVocabSize
  
  public int getVocabSize()
  Vocabulary size. 8k is the default size.
  optional int32 vocab_size = 4 [default = 8000];
  Specified by:
  
  getVocabSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The vocabSize.
- setVocabSize
  
  public SentencepieceModel.TrainerSpec.Builder setVocabSize(int value)
  Vocabulary size. 8k is the default size.
  optional int32 vocab_size = 4 [default = 8000];
  Parameters:
  
  value - The vocabSize to set.
  
  Returns:
  
  This builder for chaining.
- clearVocabSize
  
  public SentencepieceModel.TrainerSpec.Builder clearVocabSize()
  Vocabulary size. 8k is the default size.
  optional int32 vocab_size = 4 [default = 8000];
  Returns:
  
  This builder for chaining.
- getAcceptLanguageList
  
  public com.google.protobuf.ProtocolStringList getAcceptLanguageList()
  List of the languages this model can accept. Since the model is language-agnostic, this field is used as a reference.
  repeated string accept_language = 5;
  Specified by:
  
  getAcceptLanguageList in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  A list containing the acceptLanguage.
- getAcceptLanguageCount
  
  public int getAcceptLanguageCount()
  List of the languages this model can accept. Since the model is language-agnostic, this field is used as a reference.
  repeated string accept_language = 5;
  Specified by:
  
  getAcceptLanguageCount in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The count of acceptLanguage.
- getAcceptLanguage
  
  public String getAcceptLanguage(int index)
  List of the languages this model can accept. Since the model is language-agnostic, this field is used as a reference.
  repeated string accept_language = 5;
  Specified by:
  
  getAcceptLanguage in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Parameters:
  
  index - The index of the element to return.
  
  Returns:
  
  The acceptLanguage at the given index.
- getAcceptLanguageBytes
  
  public com.google.protobuf.ByteString getAcceptLanguageBytes(int index)
  List of the languages this model can accept. Since the model is language-agnostic, this field is used as a reference.
  repeated string accept_language = 5;
  Specified by:
  
  getAcceptLanguageBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Parameters:
  
  index - The index of the value to return.
  
  Returns:
  
  The bytes of the acceptLanguage at the given index.
- setAcceptLanguage
  
  public SentencepieceModel.TrainerSpec.Builder setAcceptLanguage(int index, String value)
  List of the languages this model can accept. Since the model is language-agnostic, this field is used as a reference.
  repeated string accept_language = 5;
  Parameters:
  
  index - The index to set the value at.
  
  value - The acceptLanguage to set.
  
  Returns:
  
  This builder for chaining.
- addAcceptLanguage
  
  public SentencepieceModel.TrainerSpec.Builder addAcceptLanguage(String value)
  List of the languages this model can accept. Since the model is language-agnostic, this field is used as a reference.
  repeated string accept_language = 5;
  Parameters:
  
  value - The acceptLanguage to add.
  
  Returns:
  
  This builder for chaining.
- addAllAcceptLanguage
  
  public SentencepieceModel.TrainerSpec.Builder addAllAcceptLanguage(Iterable<String> values)
  List of the languages this model can accept. Since the model is language-agnostic, this field is used as a reference.
  repeated string accept_language = 5;
  Parameters:
  
  values - The acceptLanguage to add.
  
  Returns:
  
  This builder for chaining.
- clearAcceptLanguage
  
  public SentencepieceModel.TrainerSpec.Builder clearAcceptLanguage()
  List of the languages this model can accept. Since the model is language-agnostic, this field is used as a reference.
  repeated string accept_language = 5;
  Returns:
  
  This builder for chaining.
- addAcceptLanguageBytes
  
  public SentencepieceModel.TrainerSpec.Builder addAcceptLanguageBytes(com.google.protobuf.ByteString value)
  List of the languages this model can accept. Since the model is language-agnostic, this field is used as a reference.
  repeated string accept_language = 5;
  Parameters:
  
  value - The bytes of the acceptLanguage to add.
  
  Returns:
  
  This builder for chaining.
- hasSelfTestSampleSize
  
  public boolean hasSelfTestSampleSize()
  Size of self-test samples, which are encoded in the model file.
  optional int32 self_test_sample_size = 6 [default = 0];
  Specified by:
  
  hasSelfTestSampleSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the selfTestSampleSize field is set.
- getSelfTestSampleSize
  
  public int getSelfTestSampleSize()
  Size of self-test samples, which are encoded in the model file.
  optional int32 self_test_sample_size = 6 [default = 0];
  Specified by:
  
  getSelfTestSampleSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The selfTestSampleSize.
- setSelfTestSampleSize
  
  public SentencepieceModel.TrainerSpec.Builder setSelfTestSampleSize(int value)
  Size of self-test samples, which are encoded in the model file.
  optional int32 self_test_sample_size = 6 [default = 0];
  Parameters:
  
  value - The selfTestSampleSize to set.
  
  Returns:
  
  This builder for chaining.
- clearSelfTestSampleSize
  
  public SentencepieceModel.TrainerSpec.Builder clearSelfTestSampleSize()
  Size of self-test samples, which are encoded in the model file.
  optional int32 self_test_sample_size = 6 [default = 0];
  Returns:
  
  This builder for chaining.
- hasEnableDifferentialPrivacy
  
  public boolean hasEnableDifferentialPrivacy()
  Whether to use DP version of sentencepiece. Use it with TSV input format (requires precomputed word tab counts to work).
  optional bool enable_differential_privacy = 50 [default = false];
  Specified by:
  
  hasEnableDifferentialPrivacy in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the enableDifferentialPrivacy field is set.
- getEnableDifferentialPrivacy
  
  public boolean getEnableDifferentialPrivacy()
  Whether to use DP version of sentencepiece. Use it with TSV input format (requires precomputed word tab counts to work).
  optional bool enable_differential_privacy = 50 [default = false];
  Specified by:
  
  getEnableDifferentialPrivacy in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The enableDifferentialPrivacy.
- setEnableDifferentialPrivacy
  
  public SentencepieceModel.TrainerSpec.Builder setEnableDifferentialPrivacy(boolean value)
  Whether to use DP version of sentencepiece. Use it with TSV input format (requires precomputed word tab counts to work).
  optional bool enable_differential_privacy = 50 [default = false];
  Parameters:
  
  value - The enableDifferentialPrivacy to set.
  
  Returns:
  
  This builder for chaining.
- clearEnableDifferentialPrivacy
  
  public SentencepieceModel.TrainerSpec.Builder clearEnableDifferentialPrivacy()
  Whether to use DP version of sentencepiece. Use it with TSV input format (requires precomputed word tab counts to work).
  optional bool enable_differential_privacy = 50 [default = false];
  Returns:
  
  This builder for chaining.
- hasDifferentialPrivacyNoiseLevel
  
  public boolean hasDifferentialPrivacyNoiseLevel()
  Set these parameters if you need DP version of sentencepiece. std of noise to add.
  optional float differential_privacy_noise_level = 51 [default = 0];
  Specified by:
  
  hasDifferentialPrivacyNoiseLevel in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the differentialPrivacyNoiseLevel field is set.
- getDifferentialPrivacyNoiseLevel
  
  public float getDifferentialPrivacyNoiseLevel()
  Set these parameters if you need DP version of sentencepiece. std of noise to add.
  optional float differential_privacy_noise_level = 51 [default = 0];
  Specified by:
  
  getDifferentialPrivacyNoiseLevel in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The differentialPrivacyNoiseLevel.
- setDifferentialPrivacyNoiseLevel
  
  public SentencepieceModel.TrainerSpec.Builder setDifferentialPrivacyNoiseLevel(float value)
  Set these parameters if you need DP version of sentencepiece. std of noise to add.
  optional float differential_privacy_noise_level = 51 [default = 0];
  Parameters:
  
  value - The differentialPrivacyNoiseLevel to set.
  
  Returns:
  
  This builder for chaining.
- clearDifferentialPrivacyNoiseLevel
  
  public SentencepieceModel.TrainerSpec.Builder clearDifferentialPrivacyNoiseLevel()
  Set these parameters if you need DP version of sentencepiece. std of noise to add.
  optional float differential_privacy_noise_level = 51 [default = 0];
  Returns:
  
  This builder for chaining.
- hasDifferentialPrivacyClippingThreshold
  
  public boolean hasDifferentialPrivacyClippingThreshold()
  Clipping threshold to apply after adding noise. All the words with frequency less than this value are dropped.
  optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
  Specified by:
  
  hasDifferentialPrivacyClippingThreshold in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the differentialPrivacyClippingThreshold field is set.
- getDifferentialPrivacyClippingThreshold
  
  public long getDifferentialPrivacyClippingThreshold()
  Clipping threshold to apply after adding noise. All the words with frequency less than this value are dropped.
  optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
  Specified by:
  
  getDifferentialPrivacyClippingThreshold in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The differentialPrivacyClippingThreshold.
- setDifferentialPrivacyClippingThreshold
  
  public SentencepieceModel.TrainerSpec.Builder setDifferentialPrivacyClippingThreshold(long value)
  Clipping threshold to apply after adding noise. All the words with frequency less than this value are dropped.
  optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
  Parameters:
  
  value - The differentialPrivacyClippingThreshold to set.
  
  Returns:
  
  This builder for chaining.
- clearDifferentialPrivacyClippingThreshold
  
  public SentencepieceModel.TrainerSpec.Builder clearDifferentialPrivacyClippingThreshold()
  Clipping threshold to apply after adding noise. All the words with frequency less than this value are dropped.
  optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
  Returns:
  
  This builder for chaining.
- hasCharacterCoverage
  
  public boolean hasCharacterCoverage()
  ///////////////////////////////////////////////////////////////// Training parameters. Uses characters which cover the corpus with the ratio of `chars_coverage`. This parameter determines the set of basic Alphabet of sentence piece. 1.0 - `chars_coverage` characters are treated as UNK. See also required_chars field.
  optional float character_coverage = 10 [default = 0.9995];
  Specified by:
  
  hasCharacterCoverage in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the characterCoverage field is set.
- getCharacterCoverage
  
  public float getCharacterCoverage()
  ///////////////////////////////////////////////////////////////// Training parameters. Uses characters which cover the corpus with the ratio of `chars_coverage`. This parameter determines the set of basic Alphabet of sentence piece. 1.0 - `chars_coverage` characters are treated as UNK. See also required_chars field.
  optional float character_coverage = 10 [default = 0.9995];
  Specified by:
  
  getCharacterCoverage in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The characterCoverage.
- setCharacterCoverage
  
  public SentencepieceModel.TrainerSpec.Builder setCharacterCoverage(float value)
  ///////////////////////////////////////////////////////////////// Training parameters. Uses characters which cover the corpus with the ratio of `chars_coverage`. This parameter determines the set of basic Alphabet of sentence piece. 1.0 - `chars_coverage` characters are treated as UNK. See also required_chars field.
  optional float character_coverage = 10 [default = 0.9995];
  Parameters:
  
  value - The characterCoverage to set.
  
  Returns:
  
  This builder for chaining.
- clearCharacterCoverage
  
  public SentencepieceModel.TrainerSpec.Builder clearCharacterCoverage()
  ///////////////////////////////////////////////////////////////// Training parameters. Uses characters which cover the corpus with the ratio of `chars_coverage`. This parameter determines the set of basic Alphabet of sentence piece. 1.0 - `chars_coverage` characters are treated as UNK. See also required_chars field.
  optional float character_coverage = 10 [default = 0.9995];
  Returns:
  
  This builder for chaining.
- hasInputSentenceSize
  
  public boolean hasInputSentenceSize()
  Maximum size of sentences the trainer loads from `input` parameter. Trainer simply loads the `input` files in sequence. It is better to shuffle the input corpus randomly.
  optional uint64 input_sentence_size = 11 [default = 0];
  Specified by:
  
  hasInputSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the inputSentenceSize field is set.
- getInputSentenceSize
  
  public long getInputSentenceSize()
  Maximum size of sentences the trainer loads from `input` parameter. Trainer simply loads the `input` files in sequence. It is better to shuffle the input corpus randomly.
  optional uint64 input_sentence_size = 11 [default = 0];
  Specified by:
  
  getInputSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The inputSentenceSize.
- setInputSentenceSize
  
  public SentencepieceModel.TrainerSpec.Builder setInputSentenceSize(long value)
  Maximum size of sentences the trainer loads from `input` parameter. Trainer simply loads the `input` files in sequence. It is better to shuffle the input corpus randomly.
  optional uint64 input_sentence_size = 11 [default = 0];
  Parameters:
  
  value - The inputSentenceSize to set.
  
  Returns:
  
  This builder for chaining.
- clearInputSentenceSize
  
  public SentencepieceModel.TrainerSpec.Builder clearInputSentenceSize()
  Maximum size of sentences the trainer loads from `input` parameter. Trainer simply loads the `input` files in sequence. It is better to shuffle the input corpus randomly.
  optional uint64 input_sentence_size = 11 [default = 0];
  Returns:
  
  This builder for chaining.
- hasShuffleInputSentence
  
  public boolean hasShuffleInputSentence()
  
  optional bool shuffle_input_sentence = 19 [default = true];
  
  Specified by:
  
  hasShuffleInputSentence in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the shuffleInputSentence field is set.
- getShuffleInputSentence
  
  public boolean getShuffleInputSentence()
  
  optional bool shuffle_input_sentence = 19 [default = true];
  
  Specified by:
  
  getShuffleInputSentence in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The shuffleInputSentence.
- setShuffleInputSentence
  
  public SentencepieceModel.TrainerSpec.Builder setShuffleInputSentence(boolean value)
  
  optional bool shuffle_input_sentence = 19 [default = true];
  
  Parameters:
  
  value - The shuffleInputSentence to set.
  
  Returns:
  
  This builder for chaining.
- clearShuffleInputSentence
  
  public SentencepieceModel.TrainerSpec.Builder clearShuffleInputSentence()
  
  optional bool shuffle_input_sentence = 19 [default = true];
  
  Returns:
  
  This builder for chaining.
- hasMiningSentenceSize
  
  @Deprecated public boolean hasMiningSentenceSize()
  
  Deprecated.
  com.google.genai.proto.TrainerSpec.mining_sentence_size is deprecated. See sentencepiece_model.proto;l=96
  Maximum size of sentences to make seed sentence pieces. Extended suffix array is constructed to extract frequent sub-strings from the corpus. This uses 20N working space, where N is the size of corpus.
  optional int32 mining_sentence_size = 12 [deprecated = true];
  Specified by:
  
  hasMiningSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the miningSentenceSize field is set.
- getMiningSentenceSize
  
  @Deprecated public int getMiningSentenceSize()
  
  Deprecated.
  com.google.genai.proto.TrainerSpec.mining_sentence_size is deprecated. See sentencepiece_model.proto;l=96
  Maximum size of sentences to make seed sentence pieces. Extended suffix array is constructed to extract frequent sub-strings from the corpus. This uses 20N working space, where N is the size of corpus.
  optional int32 mining_sentence_size = 12 [deprecated = true];
  Specified by:
  
  getMiningSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The miningSentenceSize.
- setMiningSentenceSize
  
  @Deprecated public SentencepieceModel.TrainerSpec.Builder setMiningSentenceSize(int value)
  
  Deprecated.
  Maximum size of sentences to make seed sentence pieces. Extended suffix array is constructed to extract frequent sub-strings from the corpus. This uses 20N working space, where N is the size of corpus.
  optional int32 mining_sentence_size = 12 [deprecated = true];
  Parameters:
  
  value - The miningSentenceSize to set.
  
  Returns:
  
  This builder for chaining.
- clearMiningSentenceSize
  
  @Deprecated public SentencepieceModel.TrainerSpec.Builder clearMiningSentenceSize()
  
  Deprecated.
  Maximum size of sentences to make seed sentence pieces. Extended suffix array is constructed to extract frequent sub-strings from the corpus. This uses 20N working space, where N is the size of corpus.
  optional int32 mining_sentence_size = 12 [deprecated = true];
  Returns:
  
  This builder for chaining.
- hasTrainingSentenceSize
  
  @Deprecated public boolean hasTrainingSentenceSize()
  
  Deprecated.
  com.google.genai.proto.TrainerSpec.training_sentence_size is deprecated. See sentencepiece_model.proto;l=99
  Maximum size of sentences to train sentence pieces.
  optional int32 training_sentence_size = 13 [deprecated = true];
  Specified by:
  
  hasTrainingSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the trainingSentenceSize field is set.
- getTrainingSentenceSize
  
  @Deprecated public int getTrainingSentenceSize()
  
  Deprecated.
  com.google.genai.proto.TrainerSpec.training_sentence_size is deprecated. See sentencepiece_model.proto;l=99
  Maximum size of sentences to train sentence pieces.
  optional int32 training_sentence_size = 13 [deprecated = true];
  Specified by:
  
  getTrainingSentenceSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The trainingSentenceSize.
- setTrainingSentenceSize
  
  @Deprecated public SentencepieceModel.TrainerSpec.Builder setTrainingSentenceSize(int value)
  
  Deprecated.
  Maximum size of sentences to train sentence pieces.
  optional int32 training_sentence_size = 13 [deprecated = true];
  Parameters:
  
  value - The trainingSentenceSize to set.
  
  Returns:
  
  This builder for chaining.
- clearTrainingSentenceSize
  
  @Deprecated public SentencepieceModel.TrainerSpec.Builder clearTrainingSentenceSize()
  
  Deprecated.
  Maximum size of sentences to train sentence pieces.
  optional int32 training_sentence_size = 13 [deprecated = true];
  Returns:
  
  This builder for chaining.
- hasSeedSentencepieceSize
  
  public boolean hasSeedSentencepieceSize()
  The size of seed sentencepieces. `seed_sentencepiece_size` must be larger than `vocab_size`.
  optional int32 seed_sentencepiece_size = 14 [default = 1000000];
  Specified by:
  
  hasSeedSentencepieceSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the seedSentencepieceSize field is set.
- getSeedSentencepieceSize
  
  public int getSeedSentencepieceSize()
  The size of seed sentencepieces. `seed_sentencepiece_size` must be larger than `vocab_size`.
  optional int32 seed_sentencepiece_size = 14 [default = 1000000];
  Specified by:
  
  getSeedSentencepieceSize in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The seedSentencepieceSize.
- setSeedSentencepieceSize
  
  public SentencepieceModel.TrainerSpec.Builder setSeedSentencepieceSize(int value)
  The size of seed sentencepieces. `seed_sentencepiece_size` must be larger than `vocab_size`.
  optional int32 seed_sentencepiece_size = 14 [default = 1000000];
  Parameters:
  
  value - The seedSentencepieceSize to set.
  
  Returns:
  
  This builder for chaining.
- clearSeedSentencepieceSize
  
  public SentencepieceModel.TrainerSpec.Builder clearSeedSentencepieceSize()
  The size of seed sentencepieces. `seed_sentencepiece_size` must be larger than `vocab_size`.
  optional int32 seed_sentencepiece_size = 14 [default = 1000000];
  Returns:
  
  This builder for chaining.
- hasShrinkingFactor
  
  public boolean hasShrinkingFactor()
  In every EM sub-iterations, keeps top `shrinking_factor` * `current sentencepieces size` with respect to the loss of the sentence piece. This value should be smaller than 1.0.
  optional float shrinking_factor = 15 [default = 0.75];
  Specified by:
  
  hasShrinkingFactor in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the shrinkingFactor field is set.
- getShrinkingFactor
  
  public float getShrinkingFactor()
  In every EM sub-iterations, keeps top `shrinking_factor` * `current sentencepieces size` with respect to the loss of the sentence piece. This value should be smaller than 1.0.
  optional float shrinking_factor = 15 [default = 0.75];
  Specified by:
  
  getShrinkingFactor in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The shrinkingFactor.
- setShrinkingFactor
  
  public SentencepieceModel.TrainerSpec.Builder setShrinkingFactor(float value)
  In every EM sub-iterations, keeps top `shrinking_factor` * `current sentencepieces size` with respect to the loss of the sentence piece. This value should be smaller than 1.0.
  optional float shrinking_factor = 15 [default = 0.75];
  Parameters:
  
  value - The shrinkingFactor to set.
  
  Returns:
  
  This builder for chaining.
- clearShrinkingFactor
  
  public SentencepieceModel.TrainerSpec.Builder clearShrinkingFactor()
  In every EM sub-iterations, keeps top `shrinking_factor` * `current sentencepieces size` with respect to the loss of the sentence piece. This value should be smaller than 1.0.
  optional float shrinking_factor = 15 [default = 0.75];
  Returns:
  
  This builder for chaining.
- hasMaxSentenceLength
  
  public boolean hasMaxSentenceLength()
  The maximum sentence length in byte. The sentences with the length larger than `max_sentence_length` is simply ignored. Longer input tends to bring the following risks: * Overflow during EM training (unigram language model only) * Performance drop because of O(n log n) cost in BPE.
  optional int32 max_sentence_length = 18 [default = 4192];
  Specified by:
  
  hasMaxSentenceLength in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the maxSentenceLength field is set.
- getMaxSentenceLength
  
  public int getMaxSentenceLength()
  The maximum sentence length in byte. The sentences with the length larger than `max_sentence_length` is simply ignored. Longer input tends to bring the following risks: * Overflow during EM training (unigram language model only) * Performance drop because of O(n log n) cost in BPE.
  optional int32 max_sentence_length = 18 [default = 4192];
  Specified by:
  
  getMaxSentenceLength in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The maxSentenceLength.
- setMaxSentenceLength
  
  public SentencepieceModel.TrainerSpec.Builder setMaxSentenceLength(int value)
  The maximum sentence length in byte. The sentences with the length larger than `max_sentence_length` is simply ignored. Longer input tends to bring the following risks: * Overflow during EM training (unigram language model only) * Performance drop because of O(n log n) cost in BPE.
  optional int32 max_sentence_length = 18 [default = 4192];
  Parameters:
  
  value - The maxSentenceLength to set.
  
  Returns:
  
  This builder for chaining.
- clearMaxSentenceLength
  
  public SentencepieceModel.TrainerSpec.Builder clearMaxSentenceLength()
  The maximum sentence length in byte. The sentences with the length larger than `max_sentence_length` is simply ignored. Longer input tends to bring the following risks: * Overflow during EM training (unigram language model only) * Performance drop because of O(n log n) cost in BPE.
  optional int32 max_sentence_length = 18 [default = 4192];
  Returns:
  
  This builder for chaining.
- hasNumThreads
  
  public boolean hasNumThreads()
  Number of threads in the training.
  optional int32 num_threads = 16 [default = 16];
  Specified by:
  
  hasNumThreads in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the numThreads field is set.
- getNumThreads
  
  public int getNumThreads()
  Number of threads in the training.
  optional int32 num_threads = 16 [default = 16];
  Specified by:
  
  getNumThreads in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The numThreads.
- setNumThreads
  
  public SentencepieceModel.TrainerSpec.Builder setNumThreads(int value)
  Number of threads in the training.
  optional int32 num_threads = 16 [default = 16];
  Parameters:
  
  value - The numThreads to set.
  
  Returns:
  
  This builder for chaining.
- clearNumThreads
  
  public SentencepieceModel.TrainerSpec.Builder clearNumThreads()
  Number of threads in the training.
  optional int32 num_threads = 16 [default = 16];
  Returns:
  
  This builder for chaining.
- hasNumSubIterations
  
  public boolean hasNumSubIterations()
  Number of EM sub iterations.
  optional int32 num_sub_iterations = 17 [default = 2];
  Specified by:
  
  hasNumSubIterations in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the numSubIterations field is set.
- getNumSubIterations
  
  public int getNumSubIterations()
  Number of EM sub iterations.
  optional int32 num_sub_iterations = 17 [default = 2];
  Specified by:
  
  getNumSubIterations in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The numSubIterations.
- setNumSubIterations
  
  public SentencepieceModel.TrainerSpec.Builder setNumSubIterations(int value)
  Number of EM sub iterations.
  optional int32 num_sub_iterations = 17 [default = 2];
  Parameters:
  
  value - The numSubIterations to set.
  
  Returns:
  
  This builder for chaining.
- clearNumSubIterations
  
  public SentencepieceModel.TrainerSpec.Builder clearNumSubIterations()
  Number of EM sub iterations.
  optional int32 num_sub_iterations = 17 [default = 2];
  Returns:
  
  This builder for chaining.
- hasMaxSentencepieceLength
  
  public boolean hasMaxSentencepieceLength()
  ///////////////////////////////////////////////////////////////// SentencePiece parameters which control the shapes of sentence piece. Maximum length of sentencepiece.
  optional int32 max_sentencepiece_length = 20 [default = 16];
  Specified by:
  
  hasMaxSentencepieceLength in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the maxSentencepieceLength field is set.
- getMaxSentencepieceLength
  
  public int getMaxSentencepieceLength()
  ///////////////////////////////////////////////////////////////// SentencePiece parameters which control the shapes of sentence piece. Maximum length of sentencepiece.
  optional int32 max_sentencepiece_length = 20 [default = 16];
  Specified by:
  
  getMaxSentencepieceLength in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The maxSentencepieceLength.
- setMaxSentencepieceLength
  
  public SentencepieceModel.TrainerSpec.Builder setMaxSentencepieceLength(int value)
  ///////////////////////////////////////////////////////////////// SentencePiece parameters which control the shapes of sentence piece. Maximum length of sentencepiece.
  optional int32 max_sentencepiece_length = 20 [default = 16];
  Parameters:
  
  value - The maxSentencepieceLength to set.
  
  Returns:
  
  This builder for chaining.
- clearMaxSentencepieceLength
  
  public SentencepieceModel.TrainerSpec.Builder clearMaxSentencepieceLength()
  ///////////////////////////////////////////////////////////////// SentencePiece parameters which control the shapes of sentence piece. Maximum length of sentencepiece.
  optional int32 max_sentencepiece_length = 20 [default = 16];
  Returns:
  
  This builder for chaining.
- hasSplitByUnicodeScript
  
  public boolean hasSplitByUnicodeScript()
  Uses Unicode script to split sentence pieces. When `split_by_unicode_script` is true, we do not allow sentence piece to include multiple Unicode scripts, e.g. "F1" is not a valid piece. Exception: CJ characters (Hiragana/Katakana/Han) are all handled as one script type, since Japanese word can consist of multiple scripts. This exception is always applied regardless of the accept-language parameter.
  optional bool split_by_unicode_script = 21 [default = true];
  Specified by:
  
  hasSplitByUnicodeScript in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the splitByUnicodeScript field is set.
- getSplitByUnicodeScript
  
  public boolean getSplitByUnicodeScript()
  Uses Unicode script to split sentence pieces. When `split_by_unicode_script` is true, we do not allow sentence piece to include multiple Unicode scripts, e.g. "F1" is not a valid piece. Exception: CJ characters (Hiragana/Katakana/Han) are all handled as one script type, since Japanese word can consist of multiple scripts. This exception is always applied regardless of the accept-language parameter.
  optional bool split_by_unicode_script = 21 [default = true];
  Specified by:
  
  getSplitByUnicodeScript in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The splitByUnicodeScript.
- setSplitByUnicodeScript
  
  public SentencepieceModel.TrainerSpec.Builder setSplitByUnicodeScript(boolean value)
  Uses Unicode script to split sentence pieces. When `split_by_unicode_script` is true, we do not allow sentence piece to include multiple Unicode scripts, e.g. "F1" is not a valid piece. Exception: CJ characters (Hiragana/Katakana/Han) are all handled as one script type, since Japanese word can consist of multiple scripts. This exception is always applied regardless of the accept-language parameter.
  optional bool split_by_unicode_script = 21 [default = true];
  Parameters:
  
  value - The splitByUnicodeScript to set.
  
  Returns:
  
  This builder for chaining.
- clearSplitByUnicodeScript
  
  public SentencepieceModel.TrainerSpec.Builder clearSplitByUnicodeScript()
  Uses Unicode script to split sentence pieces. When `split_by_unicode_script` is true, we do not allow sentence piece to include multiple Unicode scripts, e.g. "F1" is not a valid piece. Exception: CJ characters (Hiragana/Katakana/Han) are all handled as one script type, since Japanese word can consist of multiple scripts. This exception is always applied regardless of the accept-language parameter.
  optional bool split_by_unicode_script = 21 [default = true];
  Returns:
  
  This builder for chaining.
- hasSplitByNumber
  
  public boolean hasSplitByNumber()
  When `split_by_number` is true, put a boundary between number and non-number transition. If we want to treat "F1" is one token, set this flag to be false.
  optional bool split_by_number = 23 [default = true];
  Specified by:
  
  hasSplitByNumber in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the splitByNumber field is set.
- getSplitByNumber
  
  public boolean getSplitByNumber()
  When `split_by_number` is true, put a boundary between number and non-number transition. If we want to treat "F1" is one token, set this flag to be false.
  optional bool split_by_number = 23 [default = true];
  Specified by:
  
  getSplitByNumber in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The splitByNumber.
- setSplitByNumber
  
  public SentencepieceModel.TrainerSpec.Builder setSplitByNumber(boolean value)
  When `split_by_number` is true, put a boundary between number and non-number transition. If we want to treat "F1" is one token, set this flag to be false.
  optional bool split_by_number = 23 [default = true];
  Parameters:
  
  value - The splitByNumber to set.
  
  Returns:
  
  This builder for chaining.
- clearSplitByNumber
  
  public SentencepieceModel.TrainerSpec.Builder clearSplitByNumber()
  When `split_by_number` is true, put a boundary between number and non-number transition. If we want to treat "F1" is one token, set this flag to be false.
  optional bool split_by_number = 23 [default = true];
  Returns:
  
  This builder for chaining.
- hasSplitByWhitespace
  
  public boolean hasSplitByWhitespace()
  Use a white space to split sentence pieces. When `split_by_whitespace` is false, we may have the piece containing a white space in the middle. e.g., "in_the".
  optional bool split_by_whitespace = 22 [default = true];
  Specified by:
  
  hasSplitByWhitespace in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the splitByWhitespace field is set.
- getSplitByWhitespace
  
  public boolean getSplitByWhitespace()
  Use a white space to split sentence pieces. When `split_by_whitespace` is false, we may have the piece containing a white space in the middle. e.g., "in_the".
  optional bool split_by_whitespace = 22 [default = true];
  Specified by:
  
  getSplitByWhitespace in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The splitByWhitespace.
- setSplitByWhitespace
  
  public SentencepieceModel.TrainerSpec.Builder setSplitByWhitespace(boolean value)
  Use a white space to split sentence pieces. When `split_by_whitespace` is false, we may have the piece containing a white space in the middle. e.g., "in_the".
  optional bool split_by_whitespace = 22 [default = true];
  Parameters:
  
  value - The splitByWhitespace to set.
  
  Returns:
  
  This builder for chaining.
- clearSplitByWhitespace
  
  public SentencepieceModel.TrainerSpec.Builder clearSplitByWhitespace()
  Use a white space to split sentence pieces. When `split_by_whitespace` is false, we may have the piece containing a white space in the middle. e.g., "in_the".
  optional bool split_by_whitespace = 22 [default = true];
  Returns:
  
  This builder for chaining.
- hasTreatWhitespaceAsSuffix
  
  public boolean hasTreatWhitespaceAsSuffix()
  Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello => hello_. When `treat_whitespace_as_suffix` is true, NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end of sentence.
  optional bool treat_whitespace_as_suffix = 24 [default = false];
  Specified by:
  
  hasTreatWhitespaceAsSuffix in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the treatWhitespaceAsSuffix field is set.
- getTreatWhitespaceAsSuffix
  
  public boolean getTreatWhitespaceAsSuffix()
  Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello => hello_. When `treat_whitespace_as_suffix` is true, NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end of sentence.
  optional bool treat_whitespace_as_suffix = 24 [default = false];
  Specified by:
  
  getTreatWhitespaceAsSuffix in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The treatWhitespaceAsSuffix.
- setTreatWhitespaceAsSuffix
  
  public SentencepieceModel.TrainerSpec.Builder setTreatWhitespaceAsSuffix(boolean value)
  Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello => hello_. When `treat_whitespace_as_suffix` is true, NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end of sentence.
  optional bool treat_whitespace_as_suffix = 24 [default = false];
  Parameters:
  
  value - The treatWhitespaceAsSuffix to set.
  
  Returns:
  
  This builder for chaining.
- clearTreatWhitespaceAsSuffix
  
  public SentencepieceModel.TrainerSpec.Builder clearTreatWhitespaceAsSuffix()
  Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello => hello_. When `treat_whitespace_as_suffix` is true, NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end of sentence.
  optional bool treat_whitespace_as_suffix = 24 [default = false];
  Returns:
  
  This builder for chaining.
- hasAllowWhitespaceOnlyPieces
  
  public boolean hasAllowWhitespaceOnlyPieces()
  Allows pieces that only contain whitespaces instead of appearing only as prefix or suffix of other pieces.
  optional bool allow_whitespace_only_pieces = 26 [default = false];
  Specified by:
  
  hasAllowWhitespaceOnlyPieces in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the allowWhitespaceOnlyPieces field is set.
- getAllowWhitespaceOnlyPieces
  
  public boolean getAllowWhitespaceOnlyPieces()
  Allows pieces that only contain whitespaces instead of appearing only as prefix or suffix of other pieces.
  optional bool allow_whitespace_only_pieces = 26 [default = false];
  Specified by:
  
  getAllowWhitespaceOnlyPieces in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The allowWhitespaceOnlyPieces.
- setAllowWhitespaceOnlyPieces
  
  public SentencepieceModel.TrainerSpec.Builder setAllowWhitespaceOnlyPieces(boolean value)
  Allows pieces that only contain whitespaces instead of appearing only as prefix or suffix of other pieces.
  optional bool allow_whitespace_only_pieces = 26 [default = false];
  Parameters:
  
  value - The allowWhitespaceOnlyPieces to set.
  
  Returns:
  
  This builder for chaining.
- clearAllowWhitespaceOnlyPieces
  
  public SentencepieceModel.TrainerSpec.Builder clearAllowWhitespaceOnlyPieces()
  Allows pieces that only contain whitespaces instead of appearing only as prefix or suffix of other pieces.
  optional bool allow_whitespace_only_pieces = 26 [default = false];
  Returns:
  
  This builder for chaining.
- hasSplitDigits
  
  public boolean hasSplitDigits()
  Split all digits (0-9) into separate pieces.
  optional bool split_digits = 25 [default = false];
  Specified by:
  
  hasSplitDigits in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the splitDigits field is set.
- getSplitDigits
  
  public boolean getSplitDigits()
  Split all digits (0-9) into separate pieces.
  optional bool split_digits = 25 [default = false];
  Specified by:
  
  getSplitDigits in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The splitDigits.
- setSplitDigits
  
  public SentencepieceModel.TrainerSpec.Builder setSplitDigits(boolean value)
  Split all digits (0-9) into separate pieces.
  optional bool split_digits = 25 [default = false];
  Parameters:
  
  value - The splitDigits to set.
  
  Returns:
  
  This builder for chaining.
- clearSplitDigits
  
  public SentencepieceModel.TrainerSpec.Builder clearSplitDigits()
  Split all digits (0-9) into separate pieces.
  optional bool split_digits = 25 [default = false];
  Returns:
  
  This builder for chaining.
- hasPretokenizationDelimiter
  
  public boolean hasPretokenizationDelimiter()
  Defines the pre-tokenization delimiter. When specified, no pieces crossing this delimiter is not included in the vocab. Then the delimiter string is virtually ignored during the training. This field can allows constraints on the vocabulary selection. Note that this field is available on unigram mode.
  optional string pretokenization_delimiter = 53 [default = ""];
  Specified by:
  
  hasPretokenizationDelimiter in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the pretokenizationDelimiter field is set.
- getPretokenizationDelimiter
  
  public String getPretokenizationDelimiter()
  Defines the pre-tokenization delimiter. When specified, no pieces crossing this delimiter is not included in the vocab. Then the delimiter string is virtually ignored during the training. This field can allows constraints on the vocabulary selection. Note that this field is available on unigram mode.
  optional string pretokenization_delimiter = 53 [default = ""];
  Specified by:
  
  getPretokenizationDelimiter in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The pretokenizationDelimiter.
- getPretokenizationDelimiterBytes
  
  public com.google.protobuf.ByteString getPretokenizationDelimiterBytes()
  Defines the pre-tokenization delimiter. When specified, no pieces crossing this delimiter is not included in the vocab. Then the delimiter string is virtually ignored during the training. This field can allows constraints on the vocabulary selection. Note that this field is available on unigram mode.
  optional string pretokenization_delimiter = 53 [default = ""];
  Specified by:
  
  getPretokenizationDelimiterBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for pretokenizationDelimiter.
- setPretokenizationDelimiter
  
  public SentencepieceModel.TrainerSpec.Builder setPretokenizationDelimiter(String value)
  Defines the pre-tokenization delimiter. When specified, no pieces crossing this delimiter is not included in the vocab. Then the delimiter string is virtually ignored during the training. This field can allows constraints on the vocabulary selection. Note that this field is available on unigram mode.
  optional string pretokenization_delimiter = 53 [default = ""];
  Parameters:
  
  value - The pretokenizationDelimiter to set.
  
  Returns:
  
  This builder for chaining.
- clearPretokenizationDelimiter
  
  public SentencepieceModel.TrainerSpec.Builder clearPretokenizationDelimiter()
  Defines the pre-tokenization delimiter. When specified, no pieces crossing this delimiter is not included in the vocab. Then the delimiter string is virtually ignored during the training. This field can allows constraints on the vocabulary selection. Note that this field is available on unigram mode.
  optional string pretokenization_delimiter = 53 [default = ""];
  Returns:
  
  This builder for chaining.
- setPretokenizationDelimiterBytes
  
  public SentencepieceModel.TrainerSpec.Builder setPretokenizationDelimiterBytes(com.google.protobuf.ByteString value)
  Defines the pre-tokenization delimiter. When specified, no pieces crossing this delimiter is not included in the vocab. Then the delimiter string is virtually ignored during the training. This field can allows constraints on the vocabulary selection. Note that this field is available on unigram mode.
  optional string pretokenization_delimiter = 53 [default = ""];
  Parameters:
  
  value - The bytes for pretokenizationDelimiter to set.
  
  Returns:
  
  This builder for chaining.
- getControlSymbolsList
  
  public com.google.protobuf.ProtocolStringList getControlSymbolsList()
  ///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder. <s> and </s> are pre-defined. We can use this field to encode various meta information, including language indicator in multilingual model. These symbols are not visible to users, but visible to the decoder. Note that when the input sentence contains control symbols, they are not treated as one token, but segmented into normal pieces. Control symbols must be inserted independently from the segmentation.
  repeated string control_symbols = 30;
  Specified by:
  
  getControlSymbolsList in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  A list containing the controlSymbols.
- getControlSymbolsCount
  
  public int getControlSymbolsCount()
  ///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder. <s> and </s> are pre-defined. We can use this field to encode various meta information, including language indicator in multilingual model. These symbols are not visible to users, but visible to the decoder. Note that when the input sentence contains control symbols, they are not treated as one token, but segmented into normal pieces. Control symbols must be inserted independently from the segmentation.
  repeated string control_symbols = 30;
  Specified by:
  
  getControlSymbolsCount in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The count of controlSymbols.
- getControlSymbols
  
  public String getControlSymbols(int index)
  ///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder. <s> and </s> are pre-defined. We can use this field to encode various meta information, including language indicator in multilingual model. These symbols are not visible to users, but visible to the decoder. Note that when the input sentence contains control symbols, they are not treated as one token, but segmented into normal pieces. Control symbols must be inserted independently from the segmentation.
  repeated string control_symbols = 30;
  Specified by:
  
  getControlSymbols in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Parameters:
  
  index - The index of the element to return.
  
  Returns:
  
  The controlSymbols at the given index.
- getControlSymbolsBytes
  
  public com.google.protobuf.ByteString getControlSymbolsBytes(int index)
  ///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder. <s> and </s> are pre-defined. We can use this field to encode various meta information, including language indicator in multilingual model. These symbols are not visible to users, but visible to the decoder. Note that when the input sentence contains control symbols, they are not treated as one token, but segmented into normal pieces. Control symbols must be inserted independently from the segmentation.
  repeated string control_symbols = 30;
  Specified by:
  
  getControlSymbolsBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Parameters:
  
  index - The index of the value to return.
  
  Returns:
  
  The bytes of the controlSymbols at the given index.
- setControlSymbols
  
  public SentencepieceModel.TrainerSpec.Builder setControlSymbols(int index, String value)
  ///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder. <s> and </s> are pre-defined. We can use this field to encode various meta information, including language indicator in multilingual model. These symbols are not visible to users, but visible to the decoder. Note that when the input sentence contains control symbols, they are not treated as one token, but segmented into normal pieces. Control symbols must be inserted independently from the segmentation.
  repeated string control_symbols = 30;
  Parameters:
  
  index - The index to set the value at.
  
  value - The controlSymbols to set.
  
  Returns:
  
  This builder for chaining.
- addControlSymbols
  
  public SentencepieceModel.TrainerSpec.Builder addControlSymbols(String value)
  ///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder. <s> and </s> are pre-defined. We can use this field to encode various meta information, including language indicator in multilingual model. These symbols are not visible to users, but visible to the decoder. Note that when the input sentence contains control symbols, they are not treated as one token, but segmented into normal pieces. Control symbols must be inserted independently from the segmentation.
  repeated string control_symbols = 30;
  Parameters:
  
  value - The controlSymbols to add.
  
  Returns:
  
  This builder for chaining.
- addAllControlSymbols
  
  public SentencepieceModel.TrainerSpec.Builder addAllControlSymbols(Iterable<String> values)
  ///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder. <s> and </s> are pre-defined. We can use this field to encode various meta information, including language indicator in multilingual model. These symbols are not visible to users, but visible to the decoder. Note that when the input sentence contains control symbols, they are not treated as one token, but segmented into normal pieces. Control symbols must be inserted independently from the segmentation.
  repeated string control_symbols = 30;
  Parameters:
  
  values - The controlSymbols to add.
  
  Returns:
  
  This builder for chaining.
- clearControlSymbols
  
  public SentencepieceModel.TrainerSpec.Builder clearControlSymbols()
  ///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder. <s> and </s> are pre-defined. We can use this field to encode various meta information, including language indicator in multilingual model. These symbols are not visible to users, but visible to the decoder. Note that when the input sentence contains control symbols, they are not treated as one token, but segmented into normal pieces. Control symbols must be inserted independently from the segmentation.
  repeated string control_symbols = 30;
  Returns:
  
  This builder for chaining.
- addControlSymbolsBytes
  
  public SentencepieceModel.TrainerSpec.Builder addControlSymbolsBytes(com.google.protobuf.ByteString value)
  ///////////////////////////////////////////////////////////////// Vocabulary management Defines control symbols used as an indicator to change the behavior of the decoder. <s> and </s> are pre-defined. We can use this field to encode various meta information, including language indicator in multilingual model. These symbols are not visible to users, but visible to the decoder. Note that when the input sentence contains control symbols, they are not treated as one token, but segmented into normal pieces. Control symbols must be inserted independently from the segmentation.
  repeated string control_symbols = 30;
  Parameters:
  
  value - The bytes of the controlSymbols to add.
  
  Returns:
  
  This builder for chaining.
- getUserDefinedSymbolsList
  
  public com.google.protobuf.ProtocolStringList getUserDefinedSymbolsList()
  Defines user defined symbols. These symbols are added with extremely high score so they are always treated as one unique symbol in any context. Typical usage of user_defined_symbols is placeholder for named entities.
  repeated string user_defined_symbols = 31;
  Specified by:
  
  getUserDefinedSymbolsList in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  A list containing the userDefinedSymbols.
- getUserDefinedSymbolsCount
  
  public int getUserDefinedSymbolsCount()
  Defines user defined symbols. These symbols are added with extremely high score so they are always treated as one unique symbol in any context. Typical usage of user_defined_symbols is placeholder for named entities.
  repeated string user_defined_symbols = 31;
  Specified by:
  
  getUserDefinedSymbolsCount in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The count of userDefinedSymbols.
- getUserDefinedSymbols
  
  public String getUserDefinedSymbols(int index)
  Defines user defined symbols. These symbols are added with extremely high score so they are always treated as one unique symbol in any context. Typical usage of user_defined_symbols is placeholder for named entities.
  repeated string user_defined_symbols = 31;
  Specified by:
  
  getUserDefinedSymbols in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Parameters:
  
  index - The index of the element to return.
  
  Returns:
  
  The userDefinedSymbols at the given index.
- getUserDefinedSymbolsBytes
  
  public com.google.protobuf.ByteString getUserDefinedSymbolsBytes(int index)
  Defines user defined symbols. These symbols are added with extremely high score so they are always treated as one unique symbol in any context. Typical usage of user_defined_symbols is placeholder for named entities.
  repeated string user_defined_symbols = 31;
  Specified by:
  
  getUserDefinedSymbolsBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Parameters:
  
  index - The index of the value to return.
  
  Returns:
  
  The bytes of the userDefinedSymbols at the given index.
- setUserDefinedSymbols
  
  public SentencepieceModel.TrainerSpec.Builder setUserDefinedSymbols(int index, String value)
  Defines user defined symbols. These symbols are added with extremely high score so they are always treated as one unique symbol in any context. Typical usage of user_defined_symbols is placeholder for named entities.
  repeated string user_defined_symbols = 31;
  Parameters:
  
  index - The index to set the value at.
  
  value - The userDefinedSymbols to set.
  
  Returns:
  
  This builder for chaining.
- addUserDefinedSymbols
  
  public SentencepieceModel.TrainerSpec.Builder addUserDefinedSymbols(String value)
  Defines user defined symbols. These symbols are added with extremely high score so they are always treated as one unique symbol in any context. Typical usage of user_defined_symbols is placeholder for named entities.
  repeated string user_defined_symbols = 31;
  Parameters:
  
  value - The userDefinedSymbols to add.
  
  Returns:
  
  This builder for chaining.
- addAllUserDefinedSymbols
  
  public SentencepieceModel.TrainerSpec.Builder addAllUserDefinedSymbols(Iterable<String> values)
  Defines user defined symbols. These symbols are added with extremely high score so they are always treated as one unique symbol in any context. Typical usage of user_defined_symbols is placeholder for named entities.
  repeated string user_defined_symbols = 31;
  Parameters:
  
  values - The userDefinedSymbols to add.
  
  Returns:
  
  This builder for chaining.
- clearUserDefinedSymbols
  
  public SentencepieceModel.TrainerSpec.Builder clearUserDefinedSymbols()
  Defines user defined symbols. These symbols are added with extremely high score so they are always treated as one unique symbol in any context. Typical usage of user_defined_symbols is placeholder for named entities.
  repeated string user_defined_symbols = 31;
  Returns:
  
  This builder for chaining.
- addUserDefinedSymbolsBytes
  
  public SentencepieceModel.TrainerSpec.Builder addUserDefinedSymbolsBytes(com.google.protobuf.ByteString value)
  Defines user defined symbols. These symbols are added with extremely high score so they are always treated as one unique symbol in any context. Typical usage of user_defined_symbols is placeholder for named entities.
  repeated string user_defined_symbols = 31;
  Parameters:
  
  value - The bytes of the userDefinedSymbols to add.
  
  Returns:
  
  This builder for chaining.
- hasRequiredChars
  
  public boolean hasRequiredChars()
  Defines required characters. Each UTF8 character in this string is included in the character set regardless of character_coverage value. Unlike user_defined_symbols, these characters have scores based on the frequency on input sentences, and the model can form subwords using characters in this field.
  optional string required_chars = 36;
  Specified by:
  
  hasRequiredChars in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the requiredChars field is set.
- getRequiredChars
  
  public String getRequiredChars()
  Defines required characters. Each UTF8 character in this string is included in the character set regardless of character_coverage value. Unlike user_defined_symbols, these characters have scores based on the frequency on input sentences, and the model can form subwords using characters in this field.
  optional string required_chars = 36;
  Specified by:
  
  getRequiredChars in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The requiredChars.
- getRequiredCharsBytes
  
  public com.google.protobuf.ByteString getRequiredCharsBytes()
  Defines required characters. Each UTF8 character in this string is included in the character set regardless of character_coverage value. Unlike user_defined_symbols, these characters have scores based on the frequency on input sentences, and the model can form subwords using characters in this field.
  optional string required_chars = 36;
  Specified by:
  
  getRequiredCharsBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for requiredChars.
- setRequiredChars
  
  public SentencepieceModel.TrainerSpec.Builder setRequiredChars(String value)
  Defines required characters. Each UTF8 character in this string is included in the character set regardless of character_coverage value. Unlike user_defined_symbols, these characters have scores based on the frequency on input sentences, and the model can form subwords using characters in this field.
  optional string required_chars = 36;
  Parameters:
  
  value - The requiredChars to set.
  
  Returns:
  
  This builder for chaining.
- clearRequiredChars
  
  public SentencepieceModel.TrainerSpec.Builder clearRequiredChars()
  Defines required characters. Each UTF8 character in this string is included in the character set regardless of character_coverage value. Unlike user_defined_symbols, these characters have scores based on the frequency on input sentences, and the model can form subwords using characters in this field.
  optional string required_chars = 36;
  Returns:
  
  This builder for chaining.
- setRequiredCharsBytes
  
  public SentencepieceModel.TrainerSpec.Builder setRequiredCharsBytes(com.google.protobuf.ByteString value)
  Defines required characters. Each UTF8 character in this string is included in the character set regardless of character_coverage value. Unlike user_defined_symbols, these characters have scores based on the frequency on input sentences, and the model can form subwords using characters in this field.
  optional string required_chars = 36;
  Parameters:
  
  value - The bytes for requiredChars to set.
  
  Returns:
  
  This builder for chaining.
- hasByteFallback
  
  public boolean hasByteFallback()
  Decomposes unknown pieces into UTF-8 bytes.
  optional bool byte_fallback = 35 [default = false];
  Specified by:
  
  hasByteFallback in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the byteFallback field is set.
- getByteFallback
  
  public boolean getByteFallback()
  Decomposes unknown pieces into UTF-8 bytes.
  optional bool byte_fallback = 35 [default = false];
  Specified by:
  
  getByteFallback in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The byteFallback.
- setByteFallback
  
  public SentencepieceModel.TrainerSpec.Builder setByteFallback(boolean value)
  Decomposes unknown pieces into UTF-8 bytes.
  optional bool byte_fallback = 35 [default = false];
  Parameters:
  
  value - The byteFallback to set.
  
  Returns:
  
  This builder for chaining.
- clearByteFallback
  
  public SentencepieceModel.TrainerSpec.Builder clearByteFallback()
  Decomposes unknown pieces into UTF-8 bytes.
  optional bool byte_fallback = 35 [default = false];
  Returns:
  
  This builder for chaining.
- hasVocabularyOutputPieceScore
  
  public boolean hasVocabularyOutputPieceScore()
  When creating the vocabulary file, defines whether or not to additionally output the score for each piece.
  optional bool vocabulary_output_piece_score = 32 [default = true];
  Specified by:
  
  hasVocabularyOutputPieceScore in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the vocabularyOutputPieceScore field is set.
- getVocabularyOutputPieceScore
  
  public boolean getVocabularyOutputPieceScore()
  When creating the vocabulary file, defines whether or not to additionally output the score for each piece.
  optional bool vocabulary_output_piece_score = 32 [default = true];
  Specified by:
  
  getVocabularyOutputPieceScore in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The vocabularyOutputPieceScore.
- setVocabularyOutputPieceScore
  
  public SentencepieceModel.TrainerSpec.Builder setVocabularyOutputPieceScore(boolean value)
  When creating the vocabulary file, defines whether or not to additionally output the score for each piece.
  optional bool vocabulary_output_piece_score = 32 [default = true];
  Parameters:
  
  value - The vocabularyOutputPieceScore to set.
  
  Returns:
  
  This builder for chaining.
- clearVocabularyOutputPieceScore
  
  public SentencepieceModel.TrainerSpec.Builder clearVocabularyOutputPieceScore()
  When creating the vocabulary file, defines whether or not to additionally output the score for each piece.
  optional bool vocabulary_output_piece_score = 32 [default = true];
  Returns:
  
  This builder for chaining.
- hasHardVocabLimit
  
  public boolean hasHardVocabLimit()
  `vocab_size` is treated as hard limit. Crash if the model can not produce the vocab of size `vocab_size`, When `hard_vocab_limit` is false, vocab_size is treated as soft limit. Note that when model_type=char, always assumes hard_vocab_limit = false.
  optional bool hard_vocab_limit = 33 [default = true];
  Specified by:
  
  hasHardVocabLimit in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the hardVocabLimit field is set.
- getHardVocabLimit
  
  public boolean getHardVocabLimit()
  `vocab_size` is treated as hard limit. Crash if the model can not produce the vocab of size `vocab_size`, When `hard_vocab_limit` is false, vocab_size is treated as soft limit. Note that when model_type=char, always assumes hard_vocab_limit = false.
  optional bool hard_vocab_limit = 33 [default = true];
  Specified by:
  
  getHardVocabLimit in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The hardVocabLimit.
- setHardVocabLimit
  
  public SentencepieceModel.TrainerSpec.Builder setHardVocabLimit(boolean value)
  `vocab_size` is treated as hard limit. Crash if the model can not produce the vocab of size `vocab_size`, When `hard_vocab_limit` is false, vocab_size is treated as soft limit. Note that when model_type=char, always assumes hard_vocab_limit = false.
  optional bool hard_vocab_limit = 33 [default = true];
  Parameters:
  
  value - The hardVocabLimit to set.
  
  Returns:
  
  This builder for chaining.
- clearHardVocabLimit
  
  public SentencepieceModel.TrainerSpec.Builder clearHardVocabLimit()
  `vocab_size` is treated as hard limit. Crash if the model can not produce the vocab of size `vocab_size`, When `hard_vocab_limit` is false, vocab_size is treated as soft limit. Note that when model_type=char, always assumes hard_vocab_limit = false.
  optional bool hard_vocab_limit = 33 [default = true];
  Returns:
  
  This builder for chaining.
- hasUseAllVocab
  
  public boolean hasUseAllVocab()
  use all symbols for vocab extraction. This flag is valid if model type is either CHAR or WORD
  optional bool use_all_vocab = 34 [default = false];
  Specified by:
  
  hasUseAllVocab in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the useAllVocab field is set.
- getUseAllVocab
  
  public boolean getUseAllVocab()
  use all symbols for vocab extraction. This flag is valid if model type is either CHAR or WORD
  optional bool use_all_vocab = 34 [default = false];
  Specified by:
  
  getUseAllVocab in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The useAllVocab.
- setUseAllVocab
  
  public SentencepieceModel.TrainerSpec.Builder setUseAllVocab(boolean value)
  use all symbols for vocab extraction. This flag is valid if model type is either CHAR or WORD
  optional bool use_all_vocab = 34 [default = false];
  Parameters:
  
  value - The useAllVocab to set.
  
  Returns:
  
  This builder for chaining.
- clearUseAllVocab
  
  public SentencepieceModel.TrainerSpec.Builder clearUseAllVocab()
  use all symbols for vocab extraction. This flag is valid if model type is either CHAR or WORD
  optional bool use_all_vocab = 34 [default = false];
  Returns:
  
  This builder for chaining.
- hasUnkId
  
  public boolean hasUnkId()
  ///////////////////////////////////////////////////////////////// Reserved special meta tokens. * -1 is not used. * unk_id must not be -1. Id must starts with 0 and be contiguous.
  optional int32 unk_id = 40 [default = 0];
  Specified by:
  
  hasUnkId in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the unkId field is set.
- getUnkId
  
  public int getUnkId()
  ///////////////////////////////////////////////////////////////// Reserved special meta tokens. * -1 is not used. * unk_id must not be -1. Id must starts with 0 and be contiguous.
  optional int32 unk_id = 40 [default = 0];
  Specified by:
  
  getUnkId in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The unkId.
- setUnkId
  
  public SentencepieceModel.TrainerSpec.Builder setUnkId(int value)
  ///////////////////////////////////////////////////////////////// Reserved special meta tokens. * -1 is not used. * unk_id must not be -1. Id must starts with 0 and be contiguous.
  optional int32 unk_id = 40 [default = 0];
  Parameters:
  
  value - The unkId to set.
  
  Returns:
  
  This builder for chaining.
- clearUnkId
  
  public SentencepieceModel.TrainerSpec.Builder clearUnkId()
  ///////////////////////////////////////////////////////////////// Reserved special meta tokens. * -1 is not used. * unk_id must not be -1. Id must starts with 0 and be contiguous.
  optional int32 unk_id = 40 [default = 0];
  Returns:
  
  This builder for chaining.
- hasBosId
  
  public boolean hasBosId()
  <s>
  optional int32 bos_id = 41 [default = 1];
  Specified by:
  
  hasBosId in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the bosId field is set.
- getBosId
  
  public int getBosId()
  <s>
  optional int32 bos_id = 41 [default = 1];
  Specified by:
  
  getBosId in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bosId.
- setBosId
  
  public SentencepieceModel.TrainerSpec.Builder setBosId(int value)
  <s>
  optional int32 bos_id = 41 [default = 1];
  Parameters:
  
  value - The bosId to set.
  
  Returns:
  
  This builder for chaining.
- clearBosId
  
  public SentencepieceModel.TrainerSpec.Builder clearBosId()
  <s>
  optional int32 bos_id = 41 [default = 1];
  Returns:
  
  This builder for chaining.
- hasEosId
  
  public boolean hasEosId()
  </s>
  optional int32 eos_id = 42 [default = 2];
  Specified by:
  
  hasEosId in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the eosId field is set.
- getEosId
  
  public int getEosId()
  </s>
  optional int32 eos_id = 42 [default = 2];
  Specified by:
  
  getEosId in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The eosId.
- setEosId
  
  public SentencepieceModel.TrainerSpec.Builder setEosId(int value)
  </s>
  optional int32 eos_id = 42 [default = 2];
  Parameters:
  
  value - The eosId to set.
  
  Returns:
  
  This builder for chaining.
- clearEosId
  
  public SentencepieceModel.TrainerSpec.Builder clearEosId()
  </s>
  optional int32 eos_id = 42 [default = 2];
  Returns:
  
  This builder for chaining.
- hasPadId
  
  public boolean hasPadId()
  <pad> (padding)
  optional int32 pad_id = 43 [default = -1];
  Specified by:
  
  hasPadId in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the padId field is set.
- getPadId
  
  public int getPadId()
  <pad> (padding)
  optional int32 pad_id = 43 [default = -1];
  Specified by:
  
  getPadId in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The padId.
- setPadId
  
  public SentencepieceModel.TrainerSpec.Builder setPadId(int value)
  <pad> (padding)
  optional int32 pad_id = 43 [default = -1];
  Parameters:
  
  value - The padId to set.
  
  Returns:
  
  This builder for chaining.
- clearPadId
  
  public SentencepieceModel.TrainerSpec.Builder clearPadId()
  <pad> (padding)
  optional int32 pad_id = 43 [default = -1];
  Returns:
  
  This builder for chaining.
- hasUnkPiece
  
  public boolean hasUnkPiece()
  
  optional string unk_piece = 45 [default = "<unk>"];
  
  Specified by:
  
  hasUnkPiece in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the unkPiece field is set.
- getUnkPiece
  
  public String getUnkPiece()
  
  optional string unk_piece = 45 [default = "<unk>"];
  
  Specified by:
  
  getUnkPiece in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The unkPiece.
- getUnkPieceBytes
  
  public com.google.protobuf.ByteString getUnkPieceBytes()
  
  optional string unk_piece = 45 [default = "<unk>"];
  
  Specified by:
  
  getUnkPieceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for unkPiece.
- setUnkPiece
  
  public SentencepieceModel.TrainerSpec.Builder setUnkPiece(String value)
  
  optional string unk_piece = 45 [default = "<unk>"];
  
  Parameters:
  
  value - The unkPiece to set.
  
  Returns:
  
  This builder for chaining.
- clearUnkPiece
  
  public SentencepieceModel.TrainerSpec.Builder clearUnkPiece()
  
  optional string unk_piece = 45 [default = "<unk>"];
  
  Returns:
  
  This builder for chaining.
- setUnkPieceBytes
  
  public SentencepieceModel.TrainerSpec.Builder setUnkPieceBytes(com.google.protobuf.ByteString value)
  
  optional string unk_piece = 45 [default = "<unk>"];
  
  Parameters:
  
  value - The bytes for unkPiece to set.
  
  Returns:
  
  This builder for chaining.
- hasBosPiece
  
  public boolean hasBosPiece()
  
  optional string bos_piece = 46 [default = "<s>"];
  
  Specified by:
  
  hasBosPiece in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the bosPiece field is set.
- getBosPiece
  
  public String getBosPiece()
  
  optional string bos_piece = 46 [default = "<s>"];
  
  Specified by:
  
  getBosPiece in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bosPiece.
- getBosPieceBytes
  
  public com.google.protobuf.ByteString getBosPieceBytes()
  
  optional string bos_piece = 46 [default = "<s>"];
  
  Specified by:
  
  getBosPieceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for bosPiece.
- setBosPiece
  
  public SentencepieceModel.TrainerSpec.Builder setBosPiece(String value)
  
  optional string bos_piece = 46 [default = "<s>"];
  
  Parameters:
  
  value - The bosPiece to set.
  
  Returns:
  
  This builder for chaining.
- clearBosPiece
  
  public SentencepieceModel.TrainerSpec.Builder clearBosPiece()
  
  optional string bos_piece = 46 [default = "<s>"];
  
  Returns:
  
  This builder for chaining.
- setBosPieceBytes
  
  public SentencepieceModel.TrainerSpec.Builder setBosPieceBytes(com.google.protobuf.ByteString value)
  
  optional string bos_piece = 46 [default = "<s>"];
  
  Parameters:
  
  value - The bytes for bosPiece to set.
  
  Returns:
  
  This builder for chaining.
- hasEosPiece
  
  public boolean hasEosPiece()
  
  optional string eos_piece = 47 [default = "</s>"];
  
  Specified by:
  
  hasEosPiece in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the eosPiece field is set.
- getEosPiece
  
  public String getEosPiece()
  
  optional string eos_piece = 47 [default = "</s>"];
  
  Specified by:
  
  getEosPiece in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The eosPiece.
- getEosPieceBytes
  
  public com.google.protobuf.ByteString getEosPieceBytes()
  
  optional string eos_piece = 47 [default = "</s>"];
  
  Specified by:
  
  getEosPieceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for eosPiece.
- setEosPiece
  
  public SentencepieceModel.TrainerSpec.Builder setEosPiece(String value)
  
  optional string eos_piece = 47 [default = "</s>"];
  
  Parameters:
  
  value - The eosPiece to set.
  
  Returns:
  
  This builder for chaining.
- clearEosPiece
  
  public SentencepieceModel.TrainerSpec.Builder clearEosPiece()
  
  optional string eos_piece = 47 [default = "</s>"];
  
  Returns:
  
  This builder for chaining.
- setEosPieceBytes
  
  public SentencepieceModel.TrainerSpec.Builder setEosPieceBytes(com.google.protobuf.ByteString value)
  
  optional string eos_piece = 47 [default = "</s>"];
  
  Parameters:
  
  value - The bytes for eosPiece to set.
  
  Returns:
  
  This builder for chaining.
- hasPadPiece
  
  public boolean hasPadPiece()
  
  optional string pad_piece = 48 [default = "<pad>"];
  
  Specified by:
  
  hasPadPiece in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the padPiece field is set.
- getPadPiece
  
  public String getPadPiece()
  
  optional string pad_piece = 48 [default = "<pad>"];
  
  Specified by:
  
  getPadPiece in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The padPiece.
- getPadPieceBytes
  
  public com.google.protobuf.ByteString getPadPieceBytes()
  
  optional string pad_piece = 48 [default = "<pad>"];
  
  Specified by:
  
  getPadPieceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for padPiece.
- setPadPiece
  
  public SentencepieceModel.TrainerSpec.Builder setPadPiece(String value)
  
  optional string pad_piece = 48 [default = "<pad>"];
  
  Parameters:
  
  value - The padPiece to set.
  
  Returns:
  
  This builder for chaining.
- clearPadPiece
  
  public SentencepieceModel.TrainerSpec.Builder clearPadPiece()
  
  optional string pad_piece = 48 [default = "<pad>"];
  
  Returns:
  
  This builder for chaining.
- setPadPieceBytes
  
  public SentencepieceModel.TrainerSpec.Builder setPadPieceBytes(com.google.protobuf.ByteString value)
  
  optional string pad_piece = 48 [default = "<pad>"];
  
  Parameters:
  
  value - The bytes for padPiece to set.
  
  Returns:
  
  This builder for chaining.
- hasUnkSurface
  
  public boolean hasUnkSurface()
  Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer. We can easily figure out that <unk> is emitted.
  optional string unk_surface = 44 [default = " \342\201\207 "];
  Specified by:
  
  hasUnkSurface in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the unkSurface field is set.
- getUnkSurface
  
  public String getUnkSurface()
  Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer. We can easily figure out that <unk> is emitted.
  optional string unk_surface = 44 [default = " \342\201\207 "];
  Specified by:
  
  getUnkSurface in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The unkSurface.
- getUnkSurfaceBytes
  
  public com.google.protobuf.ByteString getUnkSurfaceBytes()
  Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer. We can easily figure out that <unk> is emitted.
  optional string unk_surface = 44 [default = " \342\201\207 "];
  Specified by:
  
  getUnkSurfaceBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for unkSurface.
- setUnkSurface
  
  public SentencepieceModel.TrainerSpec.Builder setUnkSurface(String value)
  Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer. We can easily figure out that <unk> is emitted.
  optional string unk_surface = 44 [default = " \342\201\207 "];
  Parameters:
  
  value - The unkSurface to set.
  
  Returns:
  
  This builder for chaining.
- clearUnkSurface
  
  public SentencepieceModel.TrainerSpec.Builder clearUnkSurface()
  Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer. We can easily figure out that <unk> is emitted.
  optional string unk_surface = 44 [default = " \342\201\207 "];
  Returns:
  
  This builder for chaining.
- setUnkSurfaceBytes
  
  public SentencepieceModel.TrainerSpec.Builder setUnkSurfaceBytes(com.google.protobuf.ByteString value)
  Encodes <unk> into U+2047 (DOUBLE QUESTION MARK), since this character can be useful both for user and developer. We can easily figure out that <unk> is emitted.
  optional string unk_surface = 44 [default = " \342\201\207 "];
  Parameters:
  
  value - The bytes for unkSurface to set.
  
  Returns:
  
  This builder for chaining.
- hasTrainExtremelyLargeCorpus
  
  public boolean hasTrainExtremelyLargeCorpus()
  Increase bit depth to allow unigram model training on large (>10M sentences) corpora. A Side-effect of enabling this flag is increased memory usage.
  optional bool train_extremely_large_corpus = 49 [default = false];
  Specified by:
  
  hasTrainExtremelyLargeCorpus in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the trainExtremelyLargeCorpus field is set.
- getTrainExtremelyLargeCorpus
  
  public boolean getTrainExtremelyLargeCorpus()
  Increase bit depth to allow unigram model training on large (>10M sentences) corpora. A Side-effect of enabling this flag is increased memory usage.
  optional bool train_extremely_large_corpus = 49 [default = false];
  Specified by:
  
  getTrainExtremelyLargeCorpus in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The trainExtremelyLargeCorpus.
- setTrainExtremelyLargeCorpus
  
  public SentencepieceModel.TrainerSpec.Builder setTrainExtremelyLargeCorpus(boolean value)
  Increase bit depth to allow unigram model training on large (>10M sentences) corpora. A Side-effect of enabling this flag is increased memory usage.
  optional bool train_extremely_large_corpus = 49 [default = false];
  Parameters:
  
  value - The trainExtremelyLargeCorpus to set.
  
  Returns:
  
  This builder for chaining.
- clearTrainExtremelyLargeCorpus
  
  public SentencepieceModel.TrainerSpec.Builder clearTrainExtremelyLargeCorpus()
  Increase bit depth to allow unigram model training on large (>10M sentences) corpora. A Side-effect of enabling this flag is increased memory usage.
  optional bool train_extremely_large_corpus = 49 [default = false];
  Returns:
  
  This builder for chaining.
- hasSeedSentencepiecesFile
  
  public boolean hasSeedSentencepiecesFile()
  Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.
  optional string seed_sentencepieces_file = 54 [default = ""];
  Specified by:
  
  hasSeedSentencepiecesFile in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  Whether the seedSentencepiecesFile field is set.
- getSeedSentencepiecesFile
  
  public String getSeedSentencepiecesFile()
  Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.
  optional string seed_sentencepieces_file = 54 [default = ""];
  Specified by:
  
  getSeedSentencepiecesFile in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The seedSentencepiecesFile.
- getSeedSentencepiecesFileBytes
  
  public com.google.protobuf.ByteString getSeedSentencepiecesFileBytes()
  Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.
  optional string seed_sentencepieces_file = 54 [default = ""];
  Specified by:
  
  getSeedSentencepiecesFileBytes in interface SentencepieceModel.TrainerSpecOrBuilder
  
  Returns:
  
  The bytes for seedSentencepiecesFile.
- setSeedSentencepiecesFile
  
  public SentencepieceModel.TrainerSpec.Builder setSeedSentencepiecesFile(String value)
  Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.
  optional string seed_sentencepieces_file = 54 [default = ""];
  Parameters:
  
  value - The seedSentencepiecesFile to set.
  
  Returns:
  
  This builder for chaining.
- clearSeedSentencepiecesFile
  
  public SentencepieceModel.TrainerSpec.Builder clearSeedSentencepiecesFile()
  Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.
  optional string seed_sentencepieces_file = 54 [default = ""];
  Returns:
  
  This builder for chaining.
- setSeedSentencepiecesFileBytes
  
  public SentencepieceModel.TrainerSpec.Builder setSeedSentencepiecesFileBytes(com.google.protobuf.ByteString value)
  Path to a seed sentencepieces file, with one tab-separated seed sentencepiece <tab> frequency per line.
  optional string seed_sentencepieces_file = 54 [default = ""];
  Parameters:
  
  value - The bytes for seedSentencepiecesFile to set.
  
  Returns:
  
  This builder for chaining.
- setUnknownFields
  
  public final SentencepieceModel.TrainerSpec.Builder setUnknownFields(com.google.protobuf.UnknownFieldSet unknownFields)
  
  Specified by:
  
  setUnknownFields in interface com.google.protobuf.Message.Builder
  
  Overrides:
  
  setUnknownFields in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>
- mergeUnknownFields
  
  public final SentencepieceModel.TrainerSpec.Builder mergeUnknownFields(com.google.protobuf.UnknownFieldSet unknownFields)
  
  Specified by:
  
  mergeUnknownFields in interface com.google.protobuf.Message.Builder
  
  Overrides:
  
  mergeUnknownFields in class com.google.protobuf.GeneratedMessageV3.Builder<SentencepieceModel.TrainerSpec.Builder>

Class SentencepieceModel.TrainerSpec.Builder

Method Summary

Methods inherited from class com.google.protobuf.GeneratedMessageV3.ExtendableBuilder

Methods inherited from class com.google.protobuf.GeneratedMessageV3.Builder

Methods inherited from class com.google.protobuf.AbstractMessage.Builder

Methods inherited from class com.google.protobuf.AbstractMessageLite.Builder

Methods inherited from class java.lang.Object

Methods inherited from interface com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder

Methods inherited from interface com.google.protobuf.Message.Builder

Methods inherited from interface com.google.protobuf.MessageLite.Builder

Methods inherited from interface com.google.protobuf.MessageOrBuilder

Method Details

getDescriptor

clear

getDescriptorForType

getDefaultInstanceForType

build

buildPartial

clone

setField

clearField

clearOneof

setRepeatedField

addRepeatedField

setExtension

setExtension

addExtension

clearExtension

mergeFrom

mergeFrom

isInitialized

mergeFrom

getInputList

getInputCount

getInput

getInputBytes

setInput

addInput

addAllInput

clearInput

addInputBytes

hasInputFormat

getInputFormat

getInputFormatBytes

setInputFormat

clearInputFormat

setInputFormatBytes

hasModelPrefix

getModelPrefix

getModelPrefixBytes

setModelPrefix

clearModelPrefix

setModelPrefixBytes

hasModelType

getModelType

setModelType

clearModelType

hasVocabSize

getVocabSize

setVocabSize

clearVocabSize

getAcceptLanguageList

getAcceptLanguageCount

getAcceptLanguage

getAcceptLanguageBytes

setAcceptLanguage

addAcceptLanguage

addAllAcceptLanguage

clearAcceptLanguage

addAcceptLanguageBytes

hasSelfTestSampleSize

getSelfTestSampleSize

setSelfTestSampleSize

clearSelfTestSampleSize

hasEnableDifferentialPrivacy

getEnableDifferentialPrivacy

setEnableDifferentialPrivacy

clearEnableDifferentialPrivacy

hasDifferentialPrivacyNoiseLevel

getDifferentialPrivacyNoiseLevel