config.json
1.9 KB · 88 lines · json Raw
1 {
2 "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3 "activation_dropout": 0.055,
4 "apply_spec_augment": true,
5 "architectures": [
6 "Wav2Vec2ForCTC"
7 ],
8 "attention_dropout": 0.094,
9 "bos_token_id": 1,
10 "classifier_proj_size": 256,
11 "codevector_dim": 768,
12 "contrastive_logits_temperature": 0.1,
13 "conv_bias": true,
14 "conv_dim": [
15 512,
16 512,
17 512,
18 512,
19 512,
20 512,
21 512
22 ],
23 "conv_kernel": [
24 10,
25 3,
26 3,
27 3,
28 3,
29 2,
30 2
31 ],
32 "conv_stride": [
33 5,
34 2,
35 2,
36 2,
37 2,
38 2,
39 2
40 ],
41 "ctc_loss_reduction": "mean",
42 "ctc_zero_infinity": true,
43 "diversity_loss_weight": 0.1,
44 "do_stable_layer_norm": true,
45 "eos_token_id": 2,
46 "feat_extract_activation": "gelu",
47 "feat_extract_dropout": 0.0,
48 "feat_extract_norm": "layer",
49 "feat_proj_dropout": 0.04,
50 "feat_quantizer_dropout": 0.0,
51 "final_dropout": 0.0,
52 "gradient_checkpointing": false,
53 "hidden_act": "gelu",
54 "hidden_dropout": 0.047,
55 "hidden_size": 1024,
56 "initializer_range": 0.02,
57 "intermediate_size": 4096,
58 "layer_norm_eps": 1e-05,
59 "layerdrop": 0.041,
60 "mask_channel_length": 10,
61 "mask_channel_min_space": 1,
62 "mask_channel_other": 0.0,
63 "mask_channel_prob": 0.0,
64 "mask_channel_selection": "static",
65 "mask_feature_length": 10,
66 "mask_feature_prob": 0.0,
67 "mask_time_length": 10,
68 "mask_time_min_space": 1,
69 "mask_time_other": 0.0,
70 "mask_time_prob": 0.4,
71 "mask_time_selection": "static",
72 "model_type": "wav2vec2",
73 "num_attention_heads": 16,
74 "num_codevector_groups": 2,
75 "num_codevectors_per_group": 320,
76 "num_conv_pos_embedding_groups": 16,
77 "num_conv_pos_embeddings": 128,
78 "num_feat_extract_layers": 7,
79 "num_hidden_layers": 24,
80 "num_negatives": 100,
81 "pad_token_id": 29,
82 "proj_codevector_dim": 768,
83 "torch_dtype": "float32",
84 "transformers_version": "4.11.2",
85 "use_weighted_layer_sum": false,
86 "vocab_size": 30
87 }
88