config.json
1.7 KB · 87 lines · json Raw
1 {
2 "_name_or_path": "facebook/hubert-base-ls960",
3 "activation_dropout": 0.1,
4 "apply_spec_augment": true,
5 "architectures": [
6 "HubertForSequenceClassification"
7 ],
8 "attention_dropout": 0.1,
9 "bos_token_id": 1,
10 "classifier_proj_size": 256,
11 "conv_bias": false,
12 "conv_dim": [
13 512,
14 512,
15 512,
16 512,
17 512,
18 512,
19 512
20 ],
21 "conv_kernel": [
22 10,
23 3,
24 3,
25 3,
26 3,
27 2,
28 2
29 ],
30 "conv_stride": [
31 5,
32 2,
33 2,
34 2,
35 2,
36 2,
37 2
38 ],
39 "ctc_loss_reduction": "sum",
40 "ctc_zero_infinity": false,
41 "do_stable_layer_norm": false,
42 "eos_token_id": 2,
43 "feat_extract_activation": "gelu",
44 "feat_extract_dropout": 0.0,
45 "feat_extract_norm": "group",
46 "feat_proj_dropout": 0.1,
47 "feat_proj_layer_norm": true,
48 "final_dropout": 0.1,
49 "finetuning_task": "hubert_clf",
50 "gradient_checkpointing": false,
51 "hidden_act": "gelu",
52 "hidden_dropout": 0.1,
53 "hidden_dropout_prob": 0.1,
54 "hidden_size": 768,
55 "id2label": {
56 "0": "bona-fide",
57 "1": "spoof"
58 },
59 "initializer_range": 0.02,
60 "intermediate_size": 3072,
61 "label2id": {
62 "bona-fide": 0,
63 "spoof": 1
64 },
65 "layer_norm_eps": 1e-05,
66 "layerdrop": 0.1,
67 "mask_feature_length": 10,
68 "mask_feature_min_masks": 0,
69 "mask_feature_prob": 0.0,
70 "mask_time_length": 10,
71 "mask_time_min_masks": 2,
72 "mask_time_prob": 0.05,
73 "model_type": "hubert",
74 "num_attention_heads": 12,
75 "num_conv_pos_embedding_groups": 16,
76 "num_conv_pos_embeddings": 128,
77 "num_feat_extract_layers": 7,
78 "num_hidden_layers": 12,
79 "pad_token_id": 0,
80 "pooling_mode": "mean",
81 "tokenizer_class": "Wav2Vec2CTCTokenizer",
82 "torch_dtype": "float32",
83 "transformers_version": "4.38.0.dev0",
84 "use_weighted_layer_sum": false,
85 "vocab_size": 32
86 }
87