public class TokenSizeTextSplitter extends TextSplitter
| 构造器和说明 |
|---|
TokenSizeTextSplitter() |
TokenSizeTextSplitter(int chunkSize) |
TokenSizeTextSplitter(int chunkSize,
int minChunkSizeChars) |
TokenSizeTextSplitter(int chunkSize,
int minChunkSizeChars,
int minChunkLengthToEmbed,
int maxChunkCount,
boolean keepSeparator) |
| 限定符和类型 | 方法和说明 |
|---|---|
protected java.lang.String |
decodeTokens(com.knuddels.jtokkit.api.Encoding encoding,
java.util.List<java.lang.Integer> tokens)
解码符号
|
protected java.util.List<java.lang.Integer> |
encodeTokens(com.knuddels.jtokkit.api.Encoding encoding,
java.lang.String text)
编码符号
|
void |
setEncodingRegistry(com.knuddels.jtokkit.api.EncodingRegistry encodingRegistry)
设置编码库
|
void |
setEncodingType(com.knuddels.jtokkit.api.EncodingType encodingType)
设置编码类型
|
protected java.util.List<java.lang.String> |
splitText(java.lang.String text) |
split, splitDocumentclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitsplitpublic TokenSizeTextSplitter()
public TokenSizeTextSplitter(int chunkSize)
public TokenSizeTextSplitter(int chunkSize,
int minChunkSizeChars)
public TokenSizeTextSplitter(int chunkSize,
int minChunkSizeChars,
int minChunkLengthToEmbed,
int maxChunkCount,
boolean keepSeparator)
public void setEncodingRegistry(com.knuddels.jtokkit.api.EncodingRegistry encodingRegistry)
public void setEncodingType(com.knuddels.jtokkit.api.EncodingType encodingType)
protected java.util.List<java.lang.String> splitText(java.lang.String text)
splitText 在类中 TextSplitterprotected java.util.List<java.lang.Integer> encodeTokens(com.knuddels.jtokkit.api.Encoding encoding,
java.lang.String text)
protected java.lang.String decodeTokens(com.knuddels.jtokkit.api.Encoding encoding,
java.util.List<java.lang.Integer> tokens)