11package dev .langchain4j .model .embedding .onnx ;
22
33import ai .djl .huggingface .tokenizers .Encoding ;
4+ import ai .djl .huggingface .tokenizers .HuggingFaceTokenizer ;
45import dev .langchain4j .data .message .*;
5- import dev .langchain4j .model .Tokenizer ;
6+ import dev .langchain4j .model .TokenCountEstimator ;
67
78import java .io .InputStream ;
89import java .nio .file .Path ;
1314import static java .nio .file .Files .newInputStream ;
1415
1516/**
16- * A <a href="https://huggingface.co/">HuggingFace</a> tokenizer .
17+ * A token count estimator for models that can be found on <a href="https://huggingface.co/">HuggingFace</a>.
1718 * <br>
18- * Uses DJL's {@link ai.djl.huggingface.tokenizers. HuggingFaceTokenizer} under the hood.
19+ * Uses DJL's {@link HuggingFaceTokenizer} under the hood.
1920 * <br>
2021 * Requires {@code tokenizer.json} to instantiate.
2122 * An <a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/blob/main/tokenizer.json">example</a>.
2223 */
23- public class HuggingFaceTokenizer implements Tokenizer {
24+ public class HuggingFaceTokenCountEstimator implements TokenCountEstimator {
2425
25- private final ai . djl . huggingface . tokenizers . HuggingFaceTokenizer tokenizer ;
26+ private final HuggingFaceTokenizer tokenizer ;
2627
2728 /**
28- * Creates an instance of a {@code HuggingFaceTokenizer } using a built-in {@code tokenizer.json} file.
29+ * Creates an instance of a {@code HuggingFaceTokenCountEstimator } using a built-in {@code tokenizer.json} file.
2930 */
30- public HuggingFaceTokenizer () {
31+ public HuggingFaceTokenCountEstimator () {
3132
3233 Map <String , String > options = new HashMap <>();
3334 options .put ("padding" , "false" );
@@ -37,22 +38,22 @@ public HuggingFaceTokenizer() {
3738 }
3839
3940 /**
40- * Creates an instance of a {@code HuggingFaceTokenizer } using a provided {@code tokenizer.json} file.
41+ * Creates an instance of a {@code HuggingFaceTokenCountEstimator } using a provided {@code tokenizer.json} file.
4142 *
4243 * @param pathToTokenizer The path to the tokenizer file (e.g., "/path/to/tokenizer.json")
4344 */
44- public HuggingFaceTokenizer (Path pathToTokenizer ) {
45+ public HuggingFaceTokenCountEstimator (Path pathToTokenizer ) {
4546 this (pathToTokenizer , null );
4647 }
4748
4849 /**
49- * Creates an instance of a {@code HuggingFaceTokenizer } using a provided {@code tokenizer.json} file
50+ * Creates an instance of a {@code HuggingFaceTokenCountEstimator } using a provided {@code tokenizer.json} file
5051 * and a map of DJL's tokenizer options.
5152 *
5253 * @param pathToTokenizer The path to the tokenizer file (e.g., "/path/to/tokenizer.json")
5354 * @param options The DJL's tokenizer options
5455 */
55- public HuggingFaceTokenizer (Path pathToTokenizer , Map <String , String > options ) {
56+ public HuggingFaceTokenCountEstimator (Path pathToTokenizer , Map <String , String > options ) {
5657 try {
5758 this .tokenizer = createFrom (newInputStream (pathToTokenizer ), options );
5859 } catch (Exception e ) {
@@ -61,33 +62,33 @@ public HuggingFaceTokenizer(Path pathToTokenizer, Map<String, String> options) {
6162 }
6263
6364 /**
64- * Creates an instance of a {@code HuggingFaceTokenizer } using a provided {@code tokenizer.json} file.
65+ * Creates an instance of a {@code HuggingFaceTokenCountEstimator } using a provided {@code tokenizer.json} file.
6566 *
6667 * @param pathToTokenizer The path to the tokenizer file (e.g., "/path/to/tokenizer.json")
6768 */
68- public HuggingFaceTokenizer (String pathToTokenizer ) {
69+ public HuggingFaceTokenCountEstimator (String pathToTokenizer ) {
6970 this (pathToTokenizer , null );
7071 }
7172
7273 /**
73- * Creates an instance of a {@code HuggingFaceTokenizer } using a provided {@code tokenizer.json} file
74+ * Creates an instance of a {@code HuggingFaceTokenCountEstimator } using a provided {@code tokenizer.json} file
7475 * and a map of DJL's tokenizer options.
7576 *
7677 * @param pathToTokenizer The path to the tokenizer file (e.g., "/path/to/tokenizer.json")
7778 * @param options The DJL's tokenizer options
7879 */
79- public HuggingFaceTokenizer (String pathToTokenizer , Map <String , String > options ) {
80+ public HuggingFaceTokenCountEstimator (String pathToTokenizer , Map <String , String > options ) {
8081 try {
8182 this .tokenizer = createFrom (newInputStream (Paths .get (pathToTokenizer )), options );
8283 } catch (Exception e ) {
8384 throw new RuntimeException (e );
8485 }
8586 }
8687
87- private static ai . djl . huggingface . tokenizers . HuggingFaceTokenizer createFrom (InputStream tokenizer ,
88- Map <String , String > options ) {
88+ private static HuggingFaceTokenizer createFrom (InputStream tokenizer ,
89+ Map <String , String > options ) {
8990 try {
90- return ai . djl . huggingface . tokenizers . HuggingFaceTokenizer .newInstance (tokenizer , options );
91+ return HuggingFaceTokenizer .newInstance (tokenizer , options );
9192 } catch (Exception e ) {
9293 throw new RuntimeException (e );
9394 }
0 commit comments