Add file_name to the returned item in Snips dataset (#2775)

nateanl · nateanl · commit 9a013fdb1d7e · 2022-10-19T08:16:27.000-04:00
Summary: Pull Request resolved: #2775 Reviewed By: carolineechen Differential Revision: D40481144 Pulled By: nateanl fbshipit-source-id: 5d0fb2478767704603a3ec28d74160e7892d4d0e
diff --git a/test/torchaudio_unittest/datasets/snips_test.py b/test/torchaudio_unittest/datasets/snips_test.py
@@ -55,7 +55,7 @@ def _get_mocked_samples(dataset_dir: str, subset: str, seed: int):
             transcript, iob, intent = f"{spk}XXX", f"{spk}YYY", f"{spk}ZZZ"
             label = "BOS " + transcript + " EOS\tO " + iob + " " + intent
             _save_label(label_path, wav_stem, label)
-            samples.append((waveform, _SAMPLE_RATE, transcript, iob, intent))
+            samples.append((waveform, _SAMPLE_RATE, wav_stem, transcript, iob, intent))
     return samples
 
 
@@ -100,12 +100,13 @@ def setUpClass(cls):
 
     def _testSnips(self, dataset, data_samples):
         num_samples = 0
-        for i, (data, sample_rate, transcript, iob, intent) in enumerate(dataset):
+        for i, (data, sample_rate, file_name, transcript, iob, intent) in enumerate(dataset):
             self.assertEqual(data, data_samples[i][0])
             assert sample_rate == data_samples[i][1]
-            assert transcript == data_samples[i][2]
-            assert iob == data_samples[i][3]
-            assert intent == data_samples[i][4]
+            assert file_name == data_samples[i][2]
+            assert transcript == data_samples[i][3]
+            assert iob == data_samples[i][4]
+            assert intent == data_samples[i][5]
             num_samples += 1
 
         assert num_samples == len(data_samples)
diff --git a/torchaudio/datasets/snips.py b/torchaudio/datasets/snips.py
@@ -112,6 +112,8 @@ def get_metadata(self, n: int) -> Tuple[str, int, str, str, str]:
                 Path to audio
             int:
                 Sample rate
+            str:
+                File name
             str:
                 Transcription of audio
             str:
@@ -123,7 +125,7 @@ def get_metadata(self, n: int) -> Tuple[str, int, str, str, str]:
         relpath = os.path.relpath(audio_path, self._path)
         file_name = audio_path.with_suffix("").name
         transcript, iob, intent = self.labels[file_name]
-        return relpath, _SAMPLE_RATE, transcript, iob, intent
+        return relpath, _SAMPLE_RATE, file_name, transcript, iob, intent
 
     def __getitem__(self, n: int) -> Tuple[torch.Tensor, int, str, str, str]:
         """Load the n-th sample from the dataset.
@@ -138,6 +140,8 @@ def __getitem__(self, n: int) -> Tuple[torch.Tensor, int, str, str, str]:
                 Waveform
             int:
                 Sample rate
+            str:
+                File name
             str:
                 Transcription of audio
             str: