Skip to content

Commit 45a3764

Browse files
Merge pull request #117 from hyperion-ml/tyche-cleanup
improving documentation
2 parents bb28c88 + 0e26874 commit 45a3764

File tree

8 files changed

+436
-79
lines changed

8 files changed

+436
-79
lines changed

egs/sre19-av-a/v1/datapath.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ master_key=$master_key_dir/NIST_SRE_segments_key.v2.csv
1212
if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
1313
ldc_root=/export/corpora5/LDC
1414
ldc_root3=/export/corpora3/LDC
15-
sitw_root=/export/corpora5/SRI/sitw
15+
sitw_root=/export/corpora5/SRI/SITW
1616
swbd_cell2_root=$ldc_root/LDC2004S07
1717
swbd2_ph1_root=$ldc_root3/LDC98S75
1818
sre08sup_root=$ldc_root/LDC2011S11

egs/sre19-av-a/v2.1/datapath.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ master_key=$master_key_dir/NIST_SRE_segments_key.v2.csv
1212
if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
1313
ldc_root=/export/corpora5/LDC
1414
ldc_root3=/export/corpora3/LDC
15-
sitw_root=/export/corpora5/SRI/sitw
15+
sitw_root=/export/corpora5/SRI/SITW
1616
sre08sup_root=$ldc_root/LDC2011S11
1717
sre10_root=/export/corpora5/SRE/SRE2010/eval
1818
sre10_root=$ldc_root3/LDC2012E09/SRE10/eval

egs/sre19-av-a/v2/datapath.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ master_key=$master_key_dir/NIST_SRE_segments_key.v2.csv
1212
if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
1313
ldc_root=/export/corpora5/LDC
1414
ldc_root3=/export/corpora3/LDC
15-
sitw_root=/export/corpora5/SRI/sitw
15+
sitw_root=/export/corpora5/SRI/SITW
1616
swbd_cell2_root=$ldc_root/LDC2004S07
1717
swbd2_ph1_root=$ldc_root3/LDC98S75
1818
sre08sup_root=$ldc_root/LDC2011S11

hyperion/classifiers/binary_logistic_regression.py

Lines changed: 88 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,52 @@
88

99

1010
class BinaryLogisticRegression(LogisticRegression):
11+
"""Binary logistic regression.
12+
13+
This is a wrapper that add functionalities to sklearn logistic regression.
14+
Contrary to sklearn, this class produces well-calibrated likelihood ratios.
15+
Thus, this is suitable for score calibration.
16+
17+
Attributes:
18+
A: Scaling Coefficients (num_feats, 1)
19+
b: biases (1, )
20+
penalty: str, ‘l1’ or ‘l2’, default: ‘l2’ ,
21+
Used to specify the norm used in the penalization. The ‘newton-cg’, ‘sag’ and ‘lbfgs’ solvers support only l2 penalties.
22+
New in version 0.19: l1 penalty with SAGA solver (allowing ‘multinomial’ + L1)
23+
lambda_reg: float, default: 1e-5
24+
Regularization strength; must be a positive float.
25+
use_bias: bool, default: True
26+
Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function.
27+
bias_scaling: float, default 1.
28+
Useful only when the solver ‘liblinear’ is used and use_bias is set to True.
29+
In this case, x becomes [x, bias_scaling], i.e. a “synthetic” feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight.
30+
Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) bias_scaling has to be increased.
31+
priors: prior prob for having a positive sample.
32+
random_state: RandomState instance or None, optional, default: None
33+
Used when solver == ‘sag’ or ‘liblinear’.
34+
solver: {‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’},
35+
default: ‘liblinear’ Algorithm to use in the optimization problem.
36+
For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and
37+
‘saga’ are faster for large ones.
38+
‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas
39+
‘liblinear’ and ‘saga’ handle L1 penalty.
40+
Note that ‘sag’ and ‘saga’ fast convergence is only guaranteed on features with approximately the same scale.
41+
New in version 0.17: Stochastic Average Gradient descent solver.
42+
New in version 0.19: SAGA solver.
43+
max_iter: int, default: 100
44+
Useful only for the newton-cg, sag and lbfgs solvers. Maximum number of iterations taken for the solvers to converge.
45+
dual: bool, default: False
46+
Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features.
47+
tol: float, default: 1e-4
48+
Tolerance for stopping criteria.
49+
verbose: int, default: 0
50+
For the liblinear and lbfgs solvers set verbose to any positive number for verbosity.
51+
warm_start: bool, default: False
52+
When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. Useless for liblinear solver.
53+
New in version 0.17: warm_start to support lbfgs, newton-cg, sag, saga solvers.
54+
lr_seed: seed for numpy random.
55+
"""
56+
1157
def __init__(
1258
self,
1359
A=None,
@@ -29,7 +75,7 @@ def __init__(
2975
):
3076

3177
priors = {0: 1 - prior, 1: prior}
32-
super(BinaryLogisticRegression, self).__init__(
78+
super().__init__(
3379
A=A,
3480
b=b,
3581
penalty=penalty,
@@ -51,15 +97,32 @@ def __init__(
5197

5298
@property
5399
def prior(self):
100+
"""Prior probability for a positive sample."""
54101
return self.priors[1]
55102

56103
def get_config(self):
104+
"""Gets configuration hyperparams.
105+
Returns:
106+
Dictionary with config hyperparams.
107+
"""
108+
57109
config = {"prior": self.prior}
58-
base_config = super(BinaryLogisticRegression, self).get_config()
110+
base_config = super().get_config()
59111
del base_config["priors"]
60112
return dict(list(base_config.items()) + list(config.items()))
61113

62114
def predict(self, x, eval_type="logit"):
115+
"""Evaluates the logistic regression.
116+
117+
It provides well calibrated likelihood ratios or posteriors.
118+
119+
Args:
120+
x: input features (num_samples, feat_dim), it can be (num_samples,) if feat_dim=1.
121+
eval_type: evaluationg method: logit (log-likelihood ratio), log-post (log-posteriors), post (posteriors)
122+
123+
Returns:
124+
Ouput scores (num_samples,)
125+
"""
63126
if x.ndim == 1:
64127
x = x[:, None]
65128

@@ -72,8 +135,25 @@ def predict(self, x, eval_type="logit"):
72135

73136
return y
74137

138+
def __call__(self, x, eval_type="logit"):
139+
"""Evaluates the logistic regression.
140+
141+
Args:
142+
x: input features (num_samples, feat_dim), it can be (num_samples,) if feat_dim=1.
143+
eval_type: evaluationg method: logit (log-likelihood ratio), log-post (log-posteriors), post (posteriors)
144+
145+
Returns:
146+
Ouput scores (num_samples,)
147+
"""
148+
return self.predict(x, eval_type)
149+
75150
@staticmethod
76-
def filter_train_args(**kwargs):
151+
def filter_class_args(**kwargs):
152+
"""Extracts the hyperparams of the class from a dictionary.
153+
154+
Returns:
155+
Hyperparamter dictionary to initialize the class.
156+
"""
77157
valid_args = (
78158
"penalty",
79159
"lambda_reg",
@@ -101,6 +181,11 @@ def filter_train_args(**kwargs):
101181

102182
@staticmethod
103183
def add_class_args(parser, prefix=None):
184+
"""It adds the arguments corresponding to the class to jsonarparse.
185+
Args:
186+
parser: jsonargparse object
187+
prefix: argument prefix.
188+
"""
104189
if prefix is None:
105190
p1 = "--"
106191
else:

hyperion/classifiers/greedy_fusion.py

Lines changed: 115 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,58 @@
1414

1515

1616
class GreedyFusionBinaryLR(HypModel):
17+
"""Greedy score fusion based on binary logistic regression.
18+
19+
It computes ``max_systmes`` fusions. The best system, the best fusion of two,
20+
the best fusion of three, ...
21+
The system selection procedure is as follows:
22+
* Choose the best system.
23+
* Fix the best system and choose the system that fuses the best with the best.
24+
* Fix the best two and choose the system that fuses the best with those two.
25+
* ...
26+
27+
Attributes:
28+
weights: fusion weights, this is a list with ``max_systems`` elements with shapes, (1,1), (2,1), (3,1), ..., (max_systems,1).
29+
bias: fusion biaes, this is a list with ``max_systems`` elements with shape (1,).
30+
system_idx: list of index vector that indicate, which systems are used for the fusion of 1 system, fusion of 2, ....
31+
system_names: list of strings containing descriptive names for the systems,
32+
max_systems: max number of systems to fuse, if None, ``max_systems=total_systems``.
33+
penalty: str, ‘l1’ or ‘l2’, default: ‘l2’ ,
34+
Used to specify the norm used in the penalization. The ‘newton-cg’, ‘sag’ and ‘lbfgs’ solvers support only l2 penalties.
35+
New in version 0.19: l1 penalty with SAGA solver (allowing ‘multinomial’ + L1)
36+
lambda_reg: float, default: 1e-5
37+
Regularization strength; must be a positive float.
38+
use_bias: bool, default: True
39+
Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function.
40+
bias_scaling: float, default 1.
41+
Useful only when the solver ‘liblinear’ is used and use_bias is set to True.
42+
In this case, x becomes [x, bias_scaling], i.e. a “synthetic” feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight.
43+
Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) bias_scaling has to be increased.
44+
priors: prior prob for having a positive sample.
45+
random_state: int, RandomState instance or None, optional, default: None
46+
The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; . Used when solver == ‘sag’ or ‘liblinear’.
47+
solver: {‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’},
48+
default: ‘liblinear’ Algorithm to use in the optimization problem.
49+
For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and
50+
‘saga’ are faster for large ones.
51+
‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas
52+
‘liblinear’ and ‘saga’ handle L1 penalty.
53+
Note that ‘sag’ and ‘saga’ fast convergence is only guaranteed on features with approximately the same scale.
54+
max_iter: int, default: 100
55+
Useful only for the newton-cg, sag and lbfgs solvers. Maximum number of iterations taken for the solvers to converge.
56+
dual: bool, default: False
57+
Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features.
58+
tol: float, default: 1e-4
59+
Tolerance for stopping criteria.
60+
verbose: int, default: 0
61+
For the liblinear and lbfgs solvers set verbose to any positive number for verbosity.
62+
warm_start: bool, default: False
63+
When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. Useless for liblinear solver.
64+
New in version 0.17: warm_start to support lbfgs, newton-cg, sag, saga solvers.
65+
lr_seed: seed for numpy random.
66+
67+
"""
68+
1769
def __init__(
1870
self,
1971
weights=None,
@@ -36,7 +88,7 @@ def __init__(
3688
**kwargs
3789
):
3890

39-
super(GreedyFusionBinaryLR, self).__init__(**kwargs)
91+
super().__init__(**kwargs)
4092

4193
self.weights = weights
4294
self.bias = bias
@@ -66,12 +118,33 @@ def __init__(
66118

67119
@property
68120
def prior(self):
121+
"""Prior probability for a positive sample."""
69122
return self.lr.prior
70123

71124
def get_fusion_params(self, idx):
125+
"""Get fusion parameters for a fusion of ``idx+1`` systems.
126+
127+
Args:
128+
idx: index of the fusion, it returns the parameters for the fusion of ``idx+1`` systems.
129+
Returns:
130+
Weights for fusion ``idx`` shape=(idx+1, 1)
131+
Bias for fusion ``idx``
132+
Indices for systems incuded in fusion ``idx``.
133+
"""
72134
return self.weights[idx], self.bias[idx], self.system_idx[idx]
73135

74136
def _predict_fus_idx(self, x, fus_idx, eval_type="logit"):
137+
"""Evals the fusion indicated by ``fus_idx``,
138+
which is the fusion of ``fus_idx+1`` systems.
139+
140+
Args:
141+
x: input features (num_samples, num_systems)
142+
fus_idx: index of the fusion, it returns the parameters for the fusion of ``fus_idx+1`` systems.
143+
eval_type: evaluationg method: logit (log-likelihood ratio), log-post (log-posteriors), post (posteriors)
144+
145+
Returns:
146+
Ouput scores (num_samples,)
147+
"""
75148

76149
w, b, idx = self.get_fusion_params(fus_idx)
77150
x = x[:, idx]
@@ -85,6 +158,18 @@ def _predict_fus_idx(self, x, fus_idx, eval_type="logit"):
85158
return y
86159

87160
def predict(self, x, fus_idx=None, eval_type="logit"):
161+
"""Evals the fusion indicated by ``fus_idx``,
162+
which is the fusion of ``fus_idx+1`` systems.
163+
164+
Args:
165+
x: input features (num_samples, num_systems)
166+
fus_idx: index of the fusion, it returns the parameters for the fusion of ``fus_idx+1`` systems.
167+
If None, it evals all the fusions and return a list of score vectors
168+
eval_type: evaluationg method: logit (log-likelihood ratio), log-post (log-posteriors), post (posteriors)
169+
170+
Returns:
171+
Ouput scores (num_samples,) or List of score vectors.
172+
"""
88173

89174
if fus_idx is None:
90175
y = []
@@ -95,7 +180,29 @@ def predict(self, x, fus_idx=None, eval_type="logit"):
95180

96181
return self._predict_fus_idx(x, fus_idx, eval_type)
97182

183+
def __call__(self, x, fus_idx=None, eval_type="logit"):
184+
"""Evals the fusion indicated by ``fus_idx``,
185+
which is the fusion of ``fus_idx+1`` systems.
186+
187+
Args:
188+
x: input features (num_samples, num_systems)
189+
fus_idx: index of the fusion, it returns the parameters for the fusion of ``fus_idx+1`` systems.
190+
If None, it evals all the fusions and return a list of score vectors
191+
eval_type: evaluationg method: logit (log-likelihood ratio), log-post (log-posteriors), post (posteriors)
192+
193+
Returns:
194+
Ouput scores (num_samples,) or List of score vectors.
195+
"""
196+
return self.predict(x, fus_idx, eval_type)
197+
98198
def fit(self, x, class_ids, sample_weights=None):
199+
"""Estimates the parameters of all the fusions
200+
201+
Args:
202+
x: input features (num_samples, feat_dim), it can be (num_samples,) if feat_dim=1.
203+
class_ids: class integer [0, 1] identifier (num_samples,)
204+
sample_weight: weight of each sample in the estimation (num_samples,)
205+
"""
99206

100207
num_systems = x.shape[1]
101208
if self.max_systems is None:
@@ -199,17 +306,19 @@ def _make_fus_name(self, idx):
199306
return fus_name
200307

201308
def get_config(self):
309+
"""Gets configuration hyperparams.
310+
Returns:
311+
Dictionary with config hyperparams.
312+
"""
202313
config = {"bias_scaling": self.lr.bias_scaling, "prior": self.lr.prior}
203-
base_config = super(GreedyFusionBinaryLR, self).get_config()
314+
base_config = super().get_config()
204315
return dict(list(base_config.items()) + list(config.items()))
205316

206317
def save_params(self, f):
207318
weights = np.concatenate(tuple(self.weights), axis=0)
208319
bias = np.concatenate(tuple(self.bias))
209320
system_idx = np.concatenate(tuple(self.system_idx), axis=0)
210321
system_names = np.asarray(self.system_names, dtype="S")
211-
# print(system_names)
212-
# print(system_names.astype('S'))
213322
params = {
214323
"weights": weights,
215324
"bias": bias,
@@ -235,7 +344,7 @@ def load_params(cls, f, config):
235344
"system_names": "S",
236345
}
237346
params = cls._load_params_to_dict(f, config["name"], param_list, dtypes)
238-
# print(params)
347+
239348
weights = []
240349
system_idx = []
241350
i = 1
@@ -249,6 +358,6 @@ def load_params(cls, f, config):
249358
params["weights"] = weights
250359
params["system_idx"] = system_idx
251360
params["system_names"] = [t.decode("utf-8") for t in params["system_names"]]
252-
# print(params)
361+
253362
kwargs = dict(list(config.items()) + list(params.items()))
254363
return cls(**kwargs)

hyperion/classifiers/linear_gbe.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -332,13 +332,8 @@ def filter_class_args(**kwargs):
332332
"""Extracts the hyperparams of the class from a dictionary.
333333
334334
Returns:
335-
Hyperparamters to initialize the class.
335+
Hyperparamter dictionary to initialize the class.
336336
"""
337-
if prefix is None:
338-
p = ""
339-
else:
340-
p = prefix + "_"
341-
342337
valid_args = (
343338
"update_mu",
344339
"update_W",
@@ -360,7 +355,7 @@ def filter_class_args(**kwargs):
360355

361356
return d
362357

363-
filter_train_args = filter_args
358+
filter_train_args = filter_class_args
364359

365360
@staticmethod
366361
def add_class_args(parser, prefix=None):

0 commit comments

Comments
 (0)