public class TokenSizeTextSplitter extends TextSplitter
构造器和说明 |
---|
TokenSizeTextSplitter() |
TokenSizeTextSplitter(int chunkSize) |
TokenSizeTextSplitter(int chunkSize,
int minChunkSizeChars) |
TokenSizeTextSplitter(int chunkSize,
int minChunkSizeChars,
int minChunkLengthToEmbed,
int maxChunkCount,
boolean keepSeparator) |
限定符和类型 | 方法和说明 |
---|---|
protected java.lang.String |
decodeTokens(com.knuddels.jtokkit.api.Encoding encoding,
java.util.List<java.lang.Integer> tokens)
解码符号
|
protected java.util.List<java.lang.Integer> |
encodeTokens(com.knuddels.jtokkit.api.Encoding encoding,
java.lang.String text)
编码符号
|
void |
setEncodingRegistry(com.knuddels.jtokkit.api.EncodingRegistry encodingRegistry)
设置编码库
|
void |
setEncodingType(com.knuddels.jtokkit.api.EncodingType encodingType)
设置编码类型
|
protected java.util.List<java.lang.String> |
splitText(java.lang.String text) |
split, splitDocument
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
split
public TokenSizeTextSplitter()
public TokenSizeTextSplitter(int chunkSize)
public TokenSizeTextSplitter(int chunkSize, int minChunkSizeChars)
public TokenSizeTextSplitter(int chunkSize, int minChunkSizeChars, int minChunkLengthToEmbed, int maxChunkCount, boolean keepSeparator)
public void setEncodingRegistry(com.knuddels.jtokkit.api.EncodingRegistry encodingRegistry)
public void setEncodingType(com.knuddels.jtokkit.api.EncodingType encodingType)
protected java.util.List<java.lang.String> splitText(java.lang.String text)
splitText
在类中 TextSplitter
protected java.util.List<java.lang.Integer> encodeTokens(com.knuddels.jtokkit.api.Encoding encoding, java.lang.String text)
protected java.lang.String decodeTokens(com.knuddels.jtokkit.api.Encoding encoding, java.util.List<java.lang.Integer> tokens)