Documentation
¶
Index ¶
Constants ¶
View Source
const ( ClassToken = "[CLS]" SeparatorToken = "[SEP]" SequenceSeparator = " ||| " )
View Source
const DefaultMaxWordChars = 200
View Source
const DefaultUnknownToken = "[UNK]"
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BasicTokenizer ¶
type BasicTokenizer struct {
Lower bool
}
func NewBasicTokenizer ¶
func NewBasicTokenizer(lower bool) *BasicTokenizer
func (*BasicTokenizer) Tokenize ¶
func (bt *BasicTokenizer) Tokenize(text string) []string
type FullTokenizer ¶
type FullTokenizer struct { Basic *BasicTokenizer Wordpiece *WordpieceTokenizer SeqLen int }
func NewFullTokenizer ¶
func NewFullTokenizer(voc *Vocab, seqLen int, lower bool) *FullTokenizer
func (*FullTokenizer) Tokenize ¶
func (tkz *FullTokenizer) Tokenize(text string) *Encode
type WordpieceTokenizer ¶
type WordpieceTokenizer struct {
// contains filtered or unexported fields
}
func NewWordpieceTokenizer ¶
func NewWordpieceTokenizer(voc *Vocab) *WordpieceTokenizer
func (*WordpieceTokenizer) SetMaxWordChars ¶
func (wp *WordpieceTokenizer) SetMaxWordChars(c int)
func (*WordpieceTokenizer) SetUnknownToken ¶
func (wp *WordpieceTokenizer) SetUnknownToken(tok string)
func (*WordpieceTokenizer) Tokenize ¶
func (wp *WordpieceTokenizer) Tokenize(text string) []string
Source Files
¶
Click to show internal directories.
Click to hide internal directories.