hyperparams.yaml
| 1 | pretrained_path: speechbrain/lang-id-voxlingua107-ecapa |
| 2 | |
| 3 | |
| 4 | # Feature parameters |
| 5 | n_mels: 60 |
| 6 | left_frames: 0 |
| 7 | right_frames: 0 |
| 8 | deltas: false |
| 9 | |
| 10 | # Number of speakers |
| 11 | out_n_neurons: 107 |
| 12 | |
| 13 | # Functions |
| 14 | compute_features: !new:speechbrain.lobes.features.Fbank |
| 15 | n_mels: 60 |
| 16 | left_frames: 0 |
| 17 | right_frames: 0 |
| 18 | deltas: false |
| 19 | |
| 20 | embedding_model: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN |
| 21 | input_size: 60 |
| 22 | channels: [1024, 1024, 1024, 1024, 3072] |
| 23 | kernel_sizes: [5, 3, 3, 3, 1] |
| 24 | dilations: [1, 2, 3, 4, 1] |
| 25 | attention_channels: 128 |
| 26 | lin_neurons: 256 |
| 27 | |
| 28 | |
| 29 | classifier: !new:speechbrain.lobes.models.Xvector.Classifier |
| 30 | input_shape: [null, null, 256] |
| 31 | activation: !name:torch.nn.LeakyReLU |
| 32 | lin_blocks: 1 |
| 33 | lin_neurons: 512 |
| 34 | out_neurons: !ref <out_n_neurons> |
| 35 | |
| 36 | |
| 37 | mean_var_norm: !new:speechbrain.processing.features.InputNormalization |
| 38 | norm_type: sentence |
| 39 | std_norm: false |
| 40 | |
| 41 | modules: |
| 42 | compute_features: !ref <compute_features> |
| 43 | mean_var_norm: !ref <mean_var_norm> |
| 44 | embedding_model: !ref <embedding_model> |
| 45 | classifier: !ref <classifier> |
| 46 | |
| 47 | label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder |
| 48 | |
| 49 | pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
| 50 | loadables: |
| 51 | embedding_model: !ref <embedding_model> |
| 52 | classifier: !ref <classifier> |
| 53 | label_encoder: !ref <label_encoder> |
| 54 | paths: |
| 55 | embedding_model: !ref <pretrained_path>/embedding_model.ckpt |
| 56 | classifier: !ref <pretrained_path>/classifier.ckpt |
| 57 | label_encoder: !ref <pretrained_path>/label_encoder.txt |
| 58 | |