config.json
5.3 KB · 208 lines · json Raw
1 {
2 "_commit_hash": null,
3 "architectures": [
4 "ClapModel"
5 ],
6 "audio_config": {
7 "_name_or_path": "",
8 "add_cross_attention": false,
9 "aff_block_r": 4,
10 "architectures": null,
11 "attention_probs_dropout_prob": 0.0,
12 "bad_words_ids": null,
13 "begin_suppress_tokens": null,
14 "bos_token_id": null,
15 "chunk_size_feed_forward": 0,
16 "cross_attention_hidden_size": null,
17 "decoder_start_token_id": null,
18 "depths": [
19 2,
20 2,
21 6,
22 2
23 ],
24 "diversity_penalty": 0.0,
25 "do_sample": false,
26 "drop_path_rate": 0.0,
27 "early_stopping": false,
28 "enable_fusion": true,
29 "enable_patch_fusion": true,
30 "enable_patch_layer_norm": true,
31 "encoder_no_repeat_ngram_size": 0,
32 "eos_token_id": null,
33 "exponential_decay_length_penalty": null,
34 "finetuning_task": null,
35 "flatten_patch_embeds": true,
36 "forced_bos_token_id": null,
37 "forced_eos_token_id": null,
38 "fusion_num_hidden_layers": 2,
39 "fusion_type": null,
40 "hidden_act": "gelu",
41 "hidden_dropout_prob": 0.1,
42 "hidden_size": 768,
43 "id2label": {
44 "0": "LABEL_0",
45 "1": "LABEL_1"
46 },
47 "initializer_factor": 1.0,
48 "is_decoder": false,
49 "is_encoder_decoder": false,
50 "label2id": {
51 "LABEL_0": 0,
52 "LABEL_1": 1
53 },
54 "layer_norm_eps": 1e-05,
55 "length_penalty": 1.0,
56 "max_length": 20,
57 "min_length": 0,
58 "mlp_ratio": 4.0,
59 "model_type": "clap_audio_model",
60 "no_repeat_ngram_size": 0,
61 "num_attention_heads": [
62 4,
63 8,
64 16,
65 32
66 ],
67 "num_beam_groups": 1,
68 "num_beams": 1,
69 "num_classes": 527,
70 "num_hidden_layers": 4,
71 "num_mel_bins": 64,
72 "num_return_sequences": 1,
73 "output_attentions": false,
74 "output_hidden_states": false,
75 "output_scores": false,
76 "pad_token_id": null,
77 "patch_embed_input_channels": 1,
78 "patch_embeds_hidden_size": 96,
79 "patch_size": 4,
80 "patch_stride": [
81 4,
82 4
83 ],
84 "prefix": null,
85 "problem_type": null,
86 "projection_dim": 512,
87 "projection_hidden_act": "relu",
88 "projection_hidden_size": 768,
89 "pruned_heads": {},
90 "qkv_bias": true,
91 "remove_invalid_values": false,
92 "repetition_penalty": 1.0,
93 "return_dict": true,
94 "return_dict_in_generate": false,
95 "sep_token_id": null,
96 "spec_size": 256,
97 "suppress_tokens": null,
98 "task_specific_params": null,
99 "temperature": 1.0,
100 "tf_legacy_loss": false,
101 "tie_encoder_decoder": false,
102 "tie_word_embeddings": true,
103 "tokenizer_class": null,
104 "top_k": 50,
105 "top_p": 1.0,
106 "torch_dtype": null,
107 "torchscript": false,
108 "transformers_version": "4.27.0.dev0",
109 "typical_p": 1.0,
110 "use_bfloat16": false,
111 "window_size": 8
112 },
113 "hidden_size": 768,
114 "initializer_factor": 1.0,
115 "logit_scale_init_value": 14.285714285714285,
116 "model_type": "clap",
117 "num_hidden_layers": 16,
118 "projection_dim": 512,
119 "projection_hidden_act": "relu",
120 "text_config": {
121 "_name_or_path": "",
122 "add_cross_attention": false,
123 "architectures": null,
124 "attention_probs_dropout_prob": 0.1,
125 "bad_words_ids": null,
126 "begin_suppress_tokens": null,
127 "bos_token_id": 0,
128 "chunk_size_feed_forward": 0,
129 "classifier_dropout": null,
130 "cross_attention_hidden_size": null,
131 "decoder_start_token_id": null,
132 "diversity_penalty": 0.0,
133 "do_sample": false,
134 "early_stopping": false,
135 "encoder_no_repeat_ngram_size": 0,
136 "eos_token_id": 2,
137 "exponential_decay_length_penalty": null,
138 "finetuning_task": null,
139 "forced_bos_token_id": null,
140 "forced_eos_token_id": null,
141 "fusion_hidden_size": 768,
142 "fusion_num_hidden_layers": 2,
143 "hidden_act": "gelu",
144 "hidden_dropout_prob": 0.1,
145 "hidden_size": 768,
146 "id2label": {
147 "0": "LABEL_0",
148 "1": "LABEL_1"
149 },
150 "initializer_factor": 1.0,
151 "initializer_range": 0.02,
152 "intermediate_size": 3072,
153 "is_decoder": false,
154 "is_encoder_decoder": false,
155 "label2id": {
156 "LABEL_0": 0,
157 "LABEL_1": 1
158 },
159 "layer_norm_eps": 1e-12,
160 "length_penalty": 1.0,
161 "max_length": 20,
162 "max_position_embeddings": 514,
163 "min_length": 0,
164 "model_type": "clap_text_model",
165 "no_repeat_ngram_size": 0,
166 "num_attention_heads": 12,
167 "num_beam_groups": 1,
168 "num_beams": 1,
169 "num_hidden_layers": 12,
170 "num_return_sequences": 1,
171 "output_attentions": false,
172 "output_hidden_states": false,
173 "output_scores": false,
174 "pad_token_id": 1,
175 "position_embedding_type": "absolute",
176 "prefix": null,
177 "problem_type": null,
178 "projection_dim": 512,
179 "projection_hidden_act": "relu",
180 "projection_hidden_size": 768,
181 "pruned_heads": {},
182 "remove_invalid_values": false,
183 "repetition_penalty": 1.0,
184 "return_dict": true,
185 "return_dict_in_generate": false,
186 "sep_token_id": null,
187 "suppress_tokens": null,
188 "task_specific_params": null,
189 "temperature": 1.0,
190 "tf_legacy_loss": false,
191 "tie_encoder_decoder": false,
192 "tie_word_embeddings": true,
193 "tokenizer_class": null,
194 "top_k": 50,
195 "top_p": 1.0,
196 "torch_dtype": null,
197 "torchscript": false,
198 "transformers_version": "4.27.0.dev0",
199 "type_vocab_size": 1,
200 "typical_p": 1.0,
201 "use_bfloat16": false,
202 "use_cache": true,
203 "vocab_size": 50265
204 },
205 "torch_dtype": "float32",
206 "transformers_version": null
207 }
208