From cbbc707f41b8b26dcec5e9bc741447a68de4dad5 Mon Sep 17 00:00:00 2001 From: many-hats <52046775+many-hats@users.noreply.github.com> Date: Tue, 12 Apr 2022 14:45:10 -0400 Subject: [PATCH] update hubert_feature_reader to skip small chunks Very rarely, if the chunk is smaller than the kernel size (e.g. (x.size(1) % max_chunk) < 10), the feature reader will produce a runtime error: RuntimeError: Calculated padded input size per channel: (1). Kernel size: (10). Kernel size can't be greater than actual input size --- textless/data/hubert_feature_reader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/textless/data/hubert_feature_reader.py b/textless/data/hubert_feature_reader.py index 4c774b5..45af901 100644 --- a/textless/data/hubert_feature_reader.py +++ b/textless/data/hubert_feature_reader.py @@ -66,6 +66,8 @@ def get_features(self, x): feat = [] for start in range(0, x.size(1), self.max_chunk): x_chunk = x[:, start : start + self.max_chunk] + if x_chunk.size(1) < 10: + continue feat_chunk, _ = self.model.extract_features( source=x_chunk, padding_mask=None,