Open zjcDM opened 1 year ago
用这个方法:
<dependency>
<groupId>ai.djl.huggingface</groupId>
<artifactId>tokenizers</artifactId>
<version>0.19.0</version>
</dependency>
private static final HuggingFaceTokenizer tokenizer;
# 声明
static {
try {
tokenizer =
HuggingFaceTokenizer.builder()
.optManager(manager)
.optPadding(true)
.optPadToMaxLength()
.optMaxLength(MAX_LENGTH)
.optTruncation(true)
.optTokenizerName("openai/clip-vit-large-patch14")
.build();
// sentence-transformers/msmarco-distilbert-dot-v5
// openai/clip-vit-large-patch14
// https://huggingface.co/sentence-transformers/msmarco-distilbert-dot-v5
// https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/tokenizer/tokenizer_config.json
} catch (IOException e) {
throw new RuntimeException(e);
}
}
# 使用
List<String> tokens = tokenizer.tokenize(prompt);
已经实现