From a40b730367ae1ee3c39fe35646cdb82cf3a1e36b Mon Sep 17 00:00:00 2001 From: Ajay Saini Date: Thu, 22 Jun 2023 11:10:20 -0700 Subject: [PATCH 1/2] Add max_new_tokens --- examples/inference-deployments/mpt/mpt_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/inference-deployments/mpt/mpt_handler.py b/examples/inference-deployments/mpt/mpt_handler.py index cf52bf70b..1c2e6c425 100644 --- a/examples/inference-deployments/mpt/mpt_handler.py +++ b/examples/inference-deployments/mpt/mpt_handler.py @@ -13,7 +13,8 @@ class MPTModelHandler(): DEFAULT_GENERATE_KWARGS = { - 'max_length': 256, + 'max_length': 256, # Counts input + output tokens (deprecated) + 'max_new_tokens': 256, # Only counts output tokens 'use_cache': True, 'do_sample': True, 'top_p': 0.95, From 3ee27aec6791ffdf2adf0d27ec33e579260e7ddb Mon Sep 17 00:00:00 2001 From: Ajay Saini Date: Thu, 22 Jun 2023 11:13:28 -0700 Subject: [PATCH 2/2] Update comment --- examples/inference-deployments/mpt/mpt_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/inference-deployments/mpt/mpt_handler.py b/examples/inference-deployments/mpt/mpt_handler.py index 1c2e6c425..b7c23bbf7 100644 --- a/examples/inference-deployments/mpt/mpt_handler.py +++ b/examples/inference-deployments/mpt/mpt_handler.py @@ -77,7 +77,7 @@ def predict(self, model_requests: List[Dict]): model_requests: List of dictionaries that contain forward pass inputs as well as other parameters, such as generate kwargs. - ex. [{'input': 'hello world!', 'parameters': {'max_length': 10}] + ex. [{'input': 'hello world!', 'parameters': {'max_new_tokens': 10}] """ generate_inputs = [] generate_kwargs = {}