config.json · clap-htsat-fused

config.json

5.3 KB · 208 lines · json Raw

1	`{`
2	`"_commit_hash": null,`
3	`"architectures": [`
4	`"ClapModel"`
5	`],`
6	`"audio_config": {`
7	`"_name_or_path": "",`
8	`"add_cross_attention": false,`
9	`"aff_block_r": 4,`
10	`"architectures": null,`
11	`"attention_probs_dropout_prob": 0.0,`
12	`"bad_words_ids": null,`
13	`"begin_suppress_tokens": null,`
14	`"bos_token_id": null,`
15	`"chunk_size_feed_forward": 0,`
16	`"cross_attention_hidden_size": null,`
17	`"decoder_start_token_id": null,`
18	`"depths": [`
19	`2,`
20	`2,`
21	`6,`
22	`2`
23	`],`
24	`"diversity_penalty": 0.0,`
25	`"do_sample": false,`
26	`"drop_path_rate": 0.0,`
27	`"early_stopping": false,`
28	`"enable_fusion": true,`
29	`"enable_patch_fusion": true,`
30	`"enable_patch_layer_norm": true,`
31	`"encoder_no_repeat_ngram_size": 0,`
32	`"eos_token_id": null,`
33	`"exponential_decay_length_penalty": null,`
34	`"finetuning_task": null,`
35	`"flatten_patch_embeds": true,`
36	`"forced_bos_token_id": null,`
37	`"forced_eos_token_id": null,`
38	`"fusion_num_hidden_layers": 2,`
39	`"fusion_type": null,`
40	`"hidden_act": "gelu",`
41	`"hidden_dropout_prob": 0.1,`
42	`"hidden_size": 768,`
43	`"id2label": {`
44	`"0": "LABEL_0",`
45	`"1": "LABEL_1"`
46	`},`
47	`"initializer_factor": 1.0,`
48	`"is_decoder": false,`
49	`"is_encoder_decoder": false,`
50	`"label2id": {`
51	`"LABEL_0": 0,`
52	`"LABEL_1": 1`
53	`},`
54	`"layer_norm_eps": 1e-05,`
55	`"length_penalty": 1.0,`
56	`"max_length": 20,`
57	`"min_length": 0,`
58	`"mlp_ratio": 4.0,`
59	`"model_type": "clap_audio_model",`
60	`"no_repeat_ngram_size": 0,`
61	`"num_attention_heads": [`
62	`4,`
63	`8,`
64	`16,`
65	`32`
66	`],`
67	`"num_beam_groups": 1,`
68	`"num_beams": 1,`
69	`"num_classes": 527,`
70	`"num_hidden_layers": 4,`
71	`"num_mel_bins": 64,`
72	`"num_return_sequences": 1,`
73	`"output_attentions": false,`
74	`"output_hidden_states": false,`
75	`"output_scores": false,`
76	`"pad_token_id": null,`
77	`"patch_embed_input_channels": 1,`
78	`"patch_embeds_hidden_size": 96,`
79	`"patch_size": 4,`
80	`"patch_stride": [`
81	`4,`
82	`4`
83	`],`
84	`"prefix": null,`
85	`"problem_type": null,`
86	`"projection_dim": 512,`
87	`"projection_hidden_act": "relu",`
88	`"projection_hidden_size": 768,`
89	`"pruned_heads": {},`
90	`"qkv_bias": true,`
91	`"remove_invalid_values": false,`
92	`"repetition_penalty": 1.0,`
93	`"return_dict": true,`
94	`"return_dict_in_generate": false,`
95	`"sep_token_id": null,`
96	`"spec_size": 256,`
97	`"suppress_tokens": null,`
98	`"task_specific_params": null,`
99	`"temperature": 1.0,`
100	`"tf_legacy_loss": false,`
101	`"tie_encoder_decoder": false,`
102	`"tie_word_embeddings": true,`
103	`"tokenizer_class": null,`
104	`"top_k": 50,`
105	`"top_p": 1.0,`
106	`"torch_dtype": null,`
107	`"torchscript": false,`
108	`"transformers_version": "4.27.0.dev0",`
109	`"typical_p": 1.0,`
110	`"use_bfloat16": false,`
111	`"window_size": 8`
112	`},`
113	`"hidden_size": 768,`
114	`"initializer_factor": 1.0,`
115	`"logit_scale_init_value": 14.285714285714285,`
116	`"model_type": "clap",`
117	`"num_hidden_layers": 16,`
118	`"projection_dim": 512,`
119	`"projection_hidden_act": "relu",`
120	`"text_config": {`
121	`"_name_or_path": "",`
122	`"add_cross_attention": false,`
123	`"architectures": null,`
124	`"attention_probs_dropout_prob": 0.1,`
125	`"bad_words_ids": null,`
126	`"begin_suppress_tokens": null,`
127	`"bos_token_id": 0,`
128	`"chunk_size_feed_forward": 0,`
129	`"classifier_dropout": null,`
130	`"cross_attention_hidden_size": null,`
131	`"decoder_start_token_id": null,`
132	`"diversity_penalty": 0.0,`
133	`"do_sample": false,`
134	`"early_stopping": false,`
135	`"encoder_no_repeat_ngram_size": 0,`
136	`"eos_token_id": 2,`
137	`"exponential_decay_length_penalty": null,`
138	`"finetuning_task": null,`
139	`"forced_bos_token_id": null,`
140	`"forced_eos_token_id": null,`
141	`"fusion_hidden_size": 768,`
142	`"fusion_num_hidden_layers": 2,`
143	`"hidden_act": "gelu",`
144	`"hidden_dropout_prob": 0.1,`
145	`"hidden_size": 768,`
146	`"id2label": {`
147	`"0": "LABEL_0",`
148	`"1": "LABEL_1"`
149	`},`
150	`"initializer_factor": 1.0,`
151	`"initializer_range": 0.02,`
152	`"intermediate_size": 3072,`
153	`"is_decoder": false,`
154	`"is_encoder_decoder": false,`
155	`"label2id": {`
156	`"LABEL_0": 0,`
157	`"LABEL_1": 1`
158	`},`
159	`"layer_norm_eps": 1e-12,`
160	`"length_penalty": 1.0,`
161	`"max_length": 20,`
162	`"max_position_embeddings": 514,`
163	`"min_length": 0,`
164	`"model_type": "clap_text_model",`
165	`"no_repeat_ngram_size": 0,`
166	`"num_attention_heads": 12,`
167	`"num_beam_groups": 1,`
168	`"num_beams": 1,`
169	`"num_hidden_layers": 12,`
170	`"num_return_sequences": 1,`
171	`"output_attentions": false,`
172	`"output_hidden_states": false,`
173	`"output_scores": false,`
174	`"pad_token_id": 1,`
175	`"position_embedding_type": "absolute",`
176	`"prefix": null,`
177	`"problem_type": null,`
178	`"projection_dim": 512,`
179	`"projection_hidden_act": "relu",`
180	`"projection_hidden_size": 768,`
181	`"pruned_heads": {},`
182	`"remove_invalid_values": false,`
183	`"repetition_penalty": 1.0,`
184	`"return_dict": true,`
185	`"return_dict_in_generate": false,`
186	`"sep_token_id": null,`
187	`"suppress_tokens": null,`
188	`"task_specific_params": null,`
189	`"temperature": 1.0,`
190	`"tf_legacy_loss": false,`
191	`"tie_encoder_decoder": false,`
192	`"tie_word_embeddings": true,`
193	`"tokenizer_class": null,`
194	`"top_k": 50,`
195	`"top_p": 1.0,`
196	`"torch_dtype": null,`
197	`"torchscript": false,`
198	`"transformers_version": "4.27.0.dev0",`
199	`"type_vocab_size": 1,`
200	`"typical_p": 1.0,`
201	`"use_bfloat16": false,`
202	`"use_cache": true,`
203	`"vocab_size": 50265`
204	`},`
205	`"torch_dtype": "float32",`
206	`"transformers_version": null`
207	`}`
208