Class SentencepieceModel.NormalizerSpec

java.lang.Object
com.google.protobuf.AbstractMessageLite
com.google.protobuf.AbstractMessage
com.google.protobuf.GeneratedMessageV3
com.google.protobuf.GeneratedMessageV3.ExtendableMessage<SentencepieceModel.NormalizerSpec>
com.google.genai.proto.SentencepieceModel.NormalizerSpec
All Implemented Interfaces:
SentencepieceModel.NormalizerSpecOrBuilder, com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder<SentencepieceModel.NormalizerSpec>, com.google.protobuf.Message, com.google.protobuf.MessageLite, com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder, Serializable
Enclosing class:
SentencepieceModel

public static final class SentencepieceModel.NormalizerSpec extends com.google.protobuf.GeneratedMessageV3.ExtendableMessage<SentencepieceModel.NormalizerSpec> implements SentencepieceModel.NormalizerSpecOrBuilder
 NormalizerSpec encodes a various parameters for string normalization
 
Protobuf type com.google.genai.proto.NormalizerSpec
See Also:
  • Field Details

  • Method Details

    • getDescriptor

      public static final com.google.protobuf.Descriptors.Descriptor getDescriptor()
    • hasName

      public boolean hasName()
       name of normalization rule.
       
      optional string name = 1;
      Specified by:
      hasName in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      Whether the name field is set.
    • getName

      public String getName()
       name of normalization rule.
       
      optional string name = 1;
      Specified by:
      getName in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      The name.
    • getNameBytes

      public com.google.protobuf.ByteString getNameBytes()
       name of normalization rule.
       
      optional string name = 1;
      Specified by:
      getNameBytes in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      The bytes for name.
    • hasPrecompiledCharsmap

      public boolean hasPrecompiledCharsmap()
       Pre-compiled normalization rule created by
       Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
       Usually this field is set by Builder::GetNormalizerSpec() method.
       
      optional bytes precompiled_charsmap = 2;
      Specified by:
      hasPrecompiledCharsmap in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      Whether the precompiledCharsmap field is set.
    • getPrecompiledCharsmap

      public com.google.protobuf.ByteString getPrecompiledCharsmap()
       Pre-compiled normalization rule created by
       Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
       Usually this field is set by Builder::GetNormalizerSpec() method.
       
      optional bytes precompiled_charsmap = 2;
      Specified by:
      getPrecompiledCharsmap in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      The precompiledCharsmap.
    • hasAddDummyPrefix

      public boolean hasAddDummyPrefix()
       Adds dummy whitespace at the beginning of text in order to
       treat "world" in "world" and "hello world" in the same way.
       
      optional bool add_dummy_prefix = 3 [default = true];
      Specified by:
      hasAddDummyPrefix in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      Whether the addDummyPrefix field is set.
    • getAddDummyPrefix

      public boolean getAddDummyPrefix()
       Adds dummy whitespace at the beginning of text in order to
       treat "world" in "world" and "hello world" in the same way.
       
      optional bool add_dummy_prefix = 3 [default = true];
      Specified by:
      getAddDummyPrefix in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      The addDummyPrefix.
    • hasRemoveExtraWhitespaces

      public boolean hasRemoveExtraWhitespaces()
       Removes leading, trailing, and duplicate internal whitespace.
       
      optional bool remove_extra_whitespaces = 4 [default = true];
      Specified by:
      hasRemoveExtraWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      Whether the removeExtraWhitespaces field is set.
    • getRemoveExtraWhitespaces

      public boolean getRemoveExtraWhitespaces()
       Removes leading, trailing, and duplicate internal whitespace.
       
      optional bool remove_extra_whitespaces = 4 [default = true];
      Specified by:
      getRemoveExtraWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      The removeExtraWhitespaces.
    • hasEscapeWhitespaces

      public boolean hasEscapeWhitespaces()
       Replaces whitespace with meta symbol.
       This field must be true to train sentence piece model.
       
      optional bool escape_whitespaces = 5 [default = true];
      Specified by:
      hasEscapeWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      Whether the escapeWhitespaces field is set.
    • getEscapeWhitespaces

      public boolean getEscapeWhitespaces()
       Replaces whitespace with meta symbol.
       This field must be true to train sentence piece model.
       
      optional bool escape_whitespaces = 5 [default = true];
      Specified by:
      getEscapeWhitespaces in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      The escapeWhitespaces.
    • hasNormalizationRuleTsv

      public boolean hasNormalizationRuleTsv()
       Custom normalization rule file in TSV format.
       https://github.com/google/sentencepiece/blob/master/doc/normalization.md
       This field is only used in SentencePieceTrainer::Train() method, which
       compiles the rule into the binary rule stored in `precompiled_charsmap`.
       
      optional string normalization_rule_tsv = 6;
      Specified by:
      hasNormalizationRuleTsv in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      Whether the normalizationRuleTsv field is set.
    • getNormalizationRuleTsv

      public String getNormalizationRuleTsv()
       Custom normalization rule file in TSV format.
       https://github.com/google/sentencepiece/blob/master/doc/normalization.md
       This field is only used in SentencePieceTrainer::Train() method, which
       compiles the rule into the binary rule stored in `precompiled_charsmap`.
       
      optional string normalization_rule_tsv = 6;
      Specified by:
      getNormalizationRuleTsv in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      The normalizationRuleTsv.
    • getNormalizationRuleTsvBytes

      public com.google.protobuf.ByteString getNormalizationRuleTsvBytes()
       Custom normalization rule file in TSV format.
       https://github.com/google/sentencepiece/blob/master/doc/normalization.md
       This field is only used in SentencePieceTrainer::Train() method, which
       compiles the rule into the binary rule stored in `precompiled_charsmap`.
       
      optional string normalization_rule_tsv = 6;
      Specified by:
      getNormalizationRuleTsvBytes in interface SentencepieceModel.NormalizerSpecOrBuilder
      Returns:
      The bytes for normalizationRuleTsv.
    • isInitialized

      public final boolean isInitialized()
      Specified by:
      isInitialized in interface com.google.protobuf.MessageLiteOrBuilder
      Overrides:
      isInitialized in class com.google.protobuf.GeneratedMessageV3.ExtendableMessage<SentencepieceModel.NormalizerSpec>
    • writeTo

      public void writeTo(com.google.protobuf.CodedOutputStream output) throws IOException
      Specified by:
      writeTo in interface com.google.protobuf.MessageLite
      Overrides:
      writeTo in class com.google.protobuf.GeneratedMessageV3
      Throws:
      IOException
    • getSerializedSize

      public int getSerializedSize()
      Specified by:
      getSerializedSize in interface com.google.protobuf.MessageLite
      Overrides:
      getSerializedSize in class com.google.protobuf.GeneratedMessageV3
    • equals

      public boolean equals(Object obj)
      Specified by:
      equals in interface com.google.protobuf.Message
      Overrides:
      equals in class com.google.protobuf.AbstractMessage
    • hashCode

      public int hashCode()
      Specified by:
      hashCode in interface com.google.protobuf.Message
      Overrides:
      hashCode in class com.google.protobuf.AbstractMessage
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(ByteBuffer data) throws com.google.protobuf.InvalidProtocolBufferException
      Throws:
      com.google.protobuf.InvalidProtocolBufferException
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(ByteBuffer data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException
      Throws:
      com.google.protobuf.InvalidProtocolBufferException
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException
      Throws:
      com.google.protobuf.InvalidProtocolBufferException
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException
      Throws:
      com.google.protobuf.InvalidProtocolBufferException
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException
      Throws:
      com.google.protobuf.InvalidProtocolBufferException
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException
      Throws:
      com.google.protobuf.InvalidProtocolBufferException
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(InputStream input) throws IOException
      Throws:
      IOException
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws IOException
      Throws:
      IOException
    • parseDelimitedFrom

      public static SentencepieceModel.NormalizerSpec parseDelimitedFrom(InputStream input) throws IOException
      Throws:
      IOException
    • parseDelimitedFrom

      public static SentencepieceModel.NormalizerSpec parseDelimitedFrom(InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws IOException
      Throws:
      IOException
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(com.google.protobuf.CodedInputStream input) throws IOException
      Throws:
      IOException
    • parseFrom

      public static SentencepieceModel.NormalizerSpec parseFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws IOException
      Throws:
      IOException
    • newBuilderForType

      public SentencepieceModel.NormalizerSpec.Builder newBuilderForType()
      Specified by:
      newBuilderForType in interface com.google.protobuf.Message
      Specified by:
      newBuilderForType in interface com.google.protobuf.MessageLite
    • newBuilder

      public static SentencepieceModel.NormalizerSpec.Builder newBuilder()
    • newBuilder

    • toBuilder

      Specified by:
      toBuilder in interface com.google.protobuf.Message
      Specified by:
      toBuilder in interface com.google.protobuf.MessageLite
    • getDefaultInstance

      public static SentencepieceModel.NormalizerSpec getDefaultInstance()
    • parser

      public static com.google.protobuf.Parser<SentencepieceModel.NormalizerSpec> parser()
    • getParserForType

      public com.google.protobuf.Parser<SentencepieceModel.NormalizerSpec> getParserForType()
      Specified by:
      getParserForType in interface com.google.protobuf.Message
      Specified by:
      getParserForType in interface com.google.protobuf.MessageLite
      Overrides:
      getParserForType in class com.google.protobuf.GeneratedMessageV3
    • getDefaultInstanceForType

      public SentencepieceModel.NormalizerSpec getDefaultInstanceForType()
      Specified by:
      getDefaultInstanceForType in interface com.google.protobuf.GeneratedMessageV3.ExtendableMessageOrBuilder<SentencepieceModel.NormalizerSpec>
      Specified by:
      getDefaultInstanceForType in interface com.google.protobuf.MessageLiteOrBuilder
      Specified by:
      getDefaultInstanceForType in interface com.google.protobuf.MessageOrBuilder