- 
                Notifications
    You must be signed in to change notification settings 
- Fork 29
Description
while running BERT model I am getting error in below line.
base_embeddings_sentences = model.encode(sentences)
Error stack is as below: please help with resolution
TypeError                                 Traceback (most recent call last)
 in 
1 model = SentenceTransformer('bert-base-nli-mean-tokens')
2 sentences = sent_tokenize(base_document)
----> 3 base_embeddings_sentences = model.encode(sentences)
4 base_embeddings = np.mean(np.array(base_embeddings_sentences), axis=0)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sentence_transformers\SentenceTransformer.py in encode(self, sentences, batch_size, show_progress_bar, output_value, convert_to_numpy, convert_to_tensor, is_pretokenized, device, num_workers)
174             iterator = tqdm(inp_dataloader, desc="Batches")
175
--> 176         for features in iterator:
177             for feature_name in features:
178                 features[feature_name] = features[feature_name].to(device)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tqdm\notebook.py in iter(self, *args, **kwargs)
220     def iter(self, *args, **kwargs):
221         try:
--> 222             for obj in super(tqdm_notebook, self).iter(*args, **kwargs):
223                 # return super(tqdm...) will not catch exception
224                 yield obj
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tqdm\std.py in iter(self)
1085             """), fp_write=getattr(self.fp, 'write', sys.stderr.write))
1086
-> 1087         for obj in iterable:
1088             yield obj
1089             # Update and possibly print the progressbar.
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\data\dataloader.py in next(self)
433         if self._sampler_iter is None:
434             self._reset()
--> 435         data = self._next_data()
436         self._num_yielded += 1
437         if self._dataset_kind == _DatasetKind.Iterable and \
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self)
473     def _next_data(self):
474         index = self._next_index()  # may raise StopIteration
--> 475         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
476         if self._pin_memory:
477             data = _utils.pin_memory.pin_memory(data)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\data_utils\fetch.py in fetch(self, possibly_batched_index)
45         else:
46             data = self.dataset[possibly_batched_index]
---> 47         return self.collate_fn(data)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sentence_transformers\SentenceTransformer.py in smart_batching_collate_text_only(self, batch)
428
429         for text in batch:
--> 430             sentence_features = self.get_sentence_features(text, max_seq_len)
431             for feature_name in sentence_features:
432                 if feature_name not in feature_lists:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sentence_transformers\SentenceTransformer.py in get_sentence_features(self, *features)
327
328     def get_sentence_features(self, *features):
--> 329         return self._first_module().get_sentence_features(*features)
330
331     def get_sentence_embedding_dimension(self):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sentence_transformers\models\Transformer.py in get_sentence_features(self, tokens, pad_seq_length)
75
76         if len(tokens) == 0 or isinstance(tokens[0], int):
---> 77             return self.tokenizer.prepare_for_model(tokens, max_length=pad_seq_length, padding='max_length', return_tensors='pt', truncation=True, prepend_batch_axis=True)
78         else:
79             return self.tokenizer.prepare_for_model(tokens[0], tokens[1], max_length=pad_seq_length, padding='max_length', return_tensors='pt', truncation='longest_first', prepend_batch_axis=True)
TypeError: prepare_for_model() got an unexpected keyword argument 'padding'