@@ -79,15 +79,20 @@ def forward(
7979 # F0 extraction
8080
8181 # input shape = [T,]
82- pitch = self ._calculate_f0 (input )
82+ pitch , pitch_log = self ._calculate_f0 (input )
8383 # (Optional): Adjust length to match with the mel-spectrogram
8484 if feats_lengths is not None :
8585 pitch = [
8686 self ._adjust_num_frames (p , fl ).view (- 1 )
8787 for p , fl in zip (pitch , feats_lengths )
8888 ]
89- pitch , mean , std = self ._normalize (pitch , durations )
90- coefs = self ._cwt (pitch .numpy ())
89+ pitch_log = [
90+ self ._adjust_num_frames (p , fl ).view (- 1 )
91+ for p , fl in zip (pitch_log , feats_lengths )
92+ ]
93+
94+ pitch_log_norm , mean , std = self ._normalize (pitch_log , durations )
95+ coefs = self ._cwt (pitch_log_norm .numpy ())
9196 # (Optional): Average by duration to calculate token-wise f0
9297 if self .use_token_averaged_f0 :
9398 pitch = self ._average_by_duration (pitch , durations )
@@ -112,10 +117,12 @@ def _calculate_f0(self, input: torch.Tensor) -> torch.Tensor:
112117 f0 = pyworld .stonemask (x , f0 , timeaxis , self .fs )
113118 if self .use_continuous_f0 :
114119 f0 = self ._convert_to_continuous_f0 (f0 )
120+
115121 if self .use_log_f0 :
116122 nonzero_idxs = np .where (f0 != 0 )[0 ]
117- f0 [nonzero_idxs ] = np .log (f0 [nonzero_idxs ])
118- return input .new_tensor (f0 .reshape (- 1 ), dtype = torch .float )
123+ f0_log [nonzero_idxs ] = np .log (f0 [nonzero_idxs ])
124+
125+ return input .new_tensor (f0 .reshape (- 1 ), dtype = torch .float ), input .new_tensor (f0_log .reshape (- 1 ), dtype = torch .float )
119126
120127
121128 @staticmethod
0 commit comments