config.json
4.6 KB · 180 lines · json Raw
1 {
2 "_name_or_path": "openai/clip-vit-large-patch14-336",
3 "architectures": [
4 "CLIPModel"
5 ],
6 "initializer_factor": 1.0,
7 "logit_scale_init_value": 2.6592,
8 "model_type": "clip",
9 "projection_dim": 768,
10 "text_config": {
11 "_name_or_path": "",
12 "add_cross_attention": false,
13 "architectures": null,
14 "attention_dropout": 0.0,
15 "bad_words_ids": null,
16 "bos_token_id": 0,
17 "chunk_size_feed_forward": 0,
18 "cross_attention_hidden_size": null,
19 "decoder_start_token_id": null,
20 "diversity_penalty": 0.0,
21 "do_sample": false,
22 "dropout": 0.0,
23 "early_stopping": false,
24 "encoder_no_repeat_ngram_size": 0,
25 "eos_token_id": 2,
26 "exponential_decay_length_penalty": null,
27 "finetuning_task": null,
28 "forced_bos_token_id": null,
29 "forced_eos_token_id": null,
30 "hidden_act": "quick_gelu",
31 "hidden_size": 768,
32 "id2label": {
33 "0": "LABEL_0",
34 "1": "LABEL_1"
35 },
36 "initializer_factor": 1.0,
37 "initializer_range": 0.02,
38 "intermediate_size": 3072,
39 "is_decoder": false,
40 "is_encoder_decoder": false,
41 "label2id": {
42 "LABEL_0": 0,
43 "LABEL_1": 1
44 },
45 "layer_norm_eps": 1e-05,
46 "length_penalty": 1.0,
47 "max_length": 20,
48 "max_position_embeddings": 77,
49 "min_length": 0,
50 "model_type": "clip_text_model",
51 "no_repeat_ngram_size": 0,
52 "num_attention_heads": 12,
53 "num_beam_groups": 1,
54 "num_beams": 1,
55 "num_hidden_layers": 12,
56 "num_return_sequences": 1,
57 "output_attentions": false,
58 "output_hidden_states": false,
59 "output_scores": false,
60 "pad_token_id": 1,
61 "prefix": null,
62 "problem_type": null,
63 "projection_dim": 768,
64 "pruned_heads": {},
65 "remove_invalid_values": false,
66 "repetition_penalty": 1.0,
67 "return_dict": true,
68 "return_dict_in_generate": false,
69 "sep_token_id": null,
70 "task_specific_params": null,
71 "temperature": 1.0,
72 "tf_legacy_loss": false,
73 "tie_encoder_decoder": false,
74 "tie_word_embeddings": true,
75 "tokenizer_class": null,
76 "top_k": 50,
77 "top_p": 1.0,
78 "torch_dtype": null,
79 "torchscript": false,
80 "transformers_version": "4.21.3",
81 "typical_p": 1.0,
82 "use_bfloat16": false,
83 "vocab_size": 49408
84 },
85 "text_config_dict": {
86 "hidden_size": 768,
87 "intermediate_size": 3072,
88 "num_attention_heads": 12,
89 "num_hidden_layers": 12,
90 "projection_dim": 768
91 },
92 "torch_dtype": "float32",
93 "transformers_version": null,
94 "vision_config": {
95 "_name_or_path": "",
96 "add_cross_attention": false,
97 "architectures": null,
98 "attention_dropout": 0.0,
99 "bad_words_ids": null,
100 "bos_token_id": null,
101 "chunk_size_feed_forward": 0,
102 "cross_attention_hidden_size": null,
103 "decoder_start_token_id": null,
104 "diversity_penalty": 0.0,
105 "do_sample": false,
106 "dropout": 0.0,
107 "early_stopping": false,
108 "encoder_no_repeat_ngram_size": 0,
109 "eos_token_id": null,
110 "exponential_decay_length_penalty": null,
111 "finetuning_task": null,
112 "forced_bos_token_id": null,
113 "forced_eos_token_id": null,
114 "hidden_act": "quick_gelu",
115 "hidden_size": 1024,
116 "id2label": {
117 "0": "LABEL_0",
118 "1": "LABEL_1"
119 },
120 "image_size": 336,
121 "initializer_factor": 1.0,
122 "initializer_range": 0.02,
123 "intermediate_size": 4096,
124 "is_decoder": false,
125 "is_encoder_decoder": false,
126 "label2id": {
127 "LABEL_0": 0,
128 "LABEL_1": 1
129 },
130 "layer_norm_eps": 1e-05,
131 "length_penalty": 1.0,
132 "max_length": 20,
133 "min_length": 0,
134 "model_type": "clip_vision_model",
135 "no_repeat_ngram_size": 0,
136 "num_attention_heads": 16,
137 "num_beam_groups": 1,
138 "num_beams": 1,
139 "num_channels": 3,
140 "num_hidden_layers": 24,
141 "num_return_sequences": 1,
142 "output_attentions": false,
143 "output_hidden_states": false,
144 "output_scores": false,
145 "pad_token_id": null,
146 "patch_size": 14,
147 "prefix": null,
148 "problem_type": null,
149 "projection_dim": 768,
150 "pruned_heads": {},
151 "remove_invalid_values": false,
152 "repetition_penalty": 1.0,
153 "return_dict": true,
154 "return_dict_in_generate": false,
155 "sep_token_id": null,
156 "task_specific_params": null,
157 "temperature": 1.0,
158 "tf_legacy_loss": false,
159 "tie_encoder_decoder": false,
160 "tie_word_embeddings": true,
161 "tokenizer_class": null,
162 "top_k": 50,
163 "top_p": 1.0,
164 "torch_dtype": null,
165 "torchscript": false,
166 "transformers_version": "4.21.3",
167 "typical_p": 1.0,
168 "use_bfloat16": false
169 },
170 "vision_config_dict": {
171 "hidden_size": 1024,
172 "image_size": 336,
173 "intermediate_size": 4096,
174 "num_attention_heads": 16,
175 "num_hidden_layers": 24,
176 "patch_size": 14,
177 "projection_dim": 768
178 }
179 }
180