Skip to content

Commit 61c7e47

Browse files
committed
Base_Classifier wasn't unicode-ready.
Fixed bug where _word_set was based on train_set, even if train_set is filelike instead of iterable.
1 parent 7505da4 commit 61c7e47

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

textblob/classifiers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ def basic_extractor(document, train_set):
8383
el_zero = iter(train_set).next() #Infer input from first element.
8484
except StopIteration:
8585
return {}
86-
if isinstance(el_zero, str):
86+
if isinstance(el_zero, basestring):
8787
word_features = [w for w in chain([el_zero],train_set)]
8888
else:
8989
try:
90-
assert(isinstance(el_zero[0], str))
90+
assert(isinstance(el_zero[0], basestring))
9191
word_features = _get_words_from_dataset(chain([el_zero],train_set))
9292
except:
9393
raise ValueError('train_set is proabably malformed.')
@@ -136,7 +136,7 @@ def __init__(self, train_set, feature_extractor=basic_extractor, format=None, **
136136
self.train_set = self._read_data(train_set, format)
137137
else: # train_set is a list of tuples
138138
self.train_set = train_set
139-
self._word_set = _get_words_from_dataset(train_set) #Keep a hidden set of unique words.
139+
self._word_set = _get_words_from_dataset(self.train_set) #Keep a hidden set of unique words.
140140
self.train_features = None
141141

142142
def _read_data(self, dataset, format=None):

0 commit comments

Comments
 (0)