config.json
4.4 KB · 172 lines · json Raw
1 {
2 "_name_or_path": "clip-vit-large-patch14/",
3 "architectures": [
4 "CLIPModel"
5 ],
6 "initializer_factor": 1.0,
7 "logit_scale_init_value": 2.6592,
8 "model_type": "clip",
9 "projection_dim": 768,
10 "text_config": {
11 "_name_or_path": "",
12 "add_cross_attention": false,
13 "architectures": null,
14 "attention_dropout": 0.0,
15 "bad_words_ids": null,
16 "bos_token_id": 0,
17 "chunk_size_feed_forward": 0,
18 "cross_attention_hidden_size": null,
19 "decoder_start_token_id": null,
20 "diversity_penalty": 0.0,
21 "do_sample": false,
22 "dropout": 0.0,
23 "early_stopping": false,
24 "encoder_no_repeat_ngram_size": 0,
25 "eos_token_id": 2,
26 "finetuning_task": null,
27 "forced_bos_token_id": null,
28 "forced_eos_token_id": null,
29 "hidden_act": "quick_gelu",
30 "hidden_size": 768,
31 "id2label": {
32 "0": "LABEL_0",
33 "1": "LABEL_1"
34 },
35 "initializer_factor": 1.0,
36 "initializer_range": 0.02,
37 "intermediate_size": 3072,
38 "is_decoder": false,
39 "is_encoder_decoder": false,
40 "label2id": {
41 "LABEL_0": 0,
42 "LABEL_1": 1
43 },
44 "layer_norm_eps": 1e-05,
45 "length_penalty": 1.0,
46 "max_length": 20,
47 "max_position_embeddings": 77,
48 "min_length": 0,
49 "model_type": "clip_text_model",
50 "no_repeat_ngram_size": 0,
51 "num_attention_heads": 12,
52 "num_beam_groups": 1,
53 "num_beams": 1,
54 "num_hidden_layers": 12,
55 "num_return_sequences": 1,
56 "output_attentions": false,
57 "output_hidden_states": false,
58 "output_scores": false,
59 "pad_token_id": 1,
60 "prefix": null,
61 "problem_type": null,
62 "projection_dim" : 768,
63 "pruned_heads": {},
64 "remove_invalid_values": false,
65 "repetition_penalty": 1.0,
66 "return_dict": true,
67 "return_dict_in_generate": false,
68 "sep_token_id": null,
69 "task_specific_params": null,
70 "temperature": 1.0,
71 "tie_encoder_decoder": false,
72 "tie_word_embeddings": true,
73 "tokenizer_class": null,
74 "top_k": 50,
75 "top_p": 1.0,
76 "torch_dtype": null,
77 "torchscript": false,
78 "transformers_version": "4.16.0.dev0",
79 "use_bfloat16": false,
80 "vocab_size": 49408
81 },
82 "text_config_dict": {
83 "hidden_size": 768,
84 "intermediate_size": 3072,
85 "num_attention_heads": 12,
86 "num_hidden_layers": 12,
87 "projection_dim": 768
88 },
89 "torch_dtype": "float32",
90 "transformers_version": null,
91 "vision_config": {
92 "_name_or_path": "",
93 "add_cross_attention": false,
94 "architectures": null,
95 "attention_dropout": 0.0,
96 "bad_words_ids": null,
97 "bos_token_id": null,
98 "chunk_size_feed_forward": 0,
99 "cross_attention_hidden_size": null,
100 "decoder_start_token_id": null,
101 "diversity_penalty": 0.0,
102 "do_sample": false,
103 "dropout": 0.0,
104 "early_stopping": false,
105 "encoder_no_repeat_ngram_size": 0,
106 "eos_token_id": null,
107 "finetuning_task": null,
108 "forced_bos_token_id": null,
109 "forced_eos_token_id": null,
110 "hidden_act": "quick_gelu",
111 "hidden_size": 1024,
112 "id2label": {
113 "0": "LABEL_0",
114 "1": "LABEL_1"
115 },
116 "image_size": 224,
117 "initializer_factor": 1.0,
118 "initializer_range": 0.02,
119 "intermediate_size": 4096,
120 "is_decoder": false,
121 "is_encoder_decoder": false,
122 "label2id": {
123 "LABEL_0": 0,
124 "LABEL_1": 1
125 },
126 "layer_norm_eps": 1e-05,
127 "length_penalty": 1.0,
128 "max_length": 20,
129 "min_length": 0,
130 "model_type": "clip_vision_model",
131 "no_repeat_ngram_size": 0,
132 "num_attention_heads": 16,
133 "num_beam_groups": 1,
134 "num_beams": 1,
135 "num_hidden_layers": 24,
136 "num_return_sequences": 1,
137 "output_attentions": false,
138 "output_hidden_states": false,
139 "output_scores": false,
140 "pad_token_id": null,
141 "patch_size": 14,
142 "prefix": null,
143 "problem_type": null,
144 "projection_dim" : 768,
145 "pruned_heads": {},
146 "remove_invalid_values": false,
147 "repetition_penalty": 1.0,
148 "return_dict": true,
149 "return_dict_in_generate": false,
150 "sep_token_id": null,
151 "task_specific_params": null,
152 "temperature": 1.0,
153 "tie_encoder_decoder": false,
154 "tie_word_embeddings": true,
155 "tokenizer_class": null,
156 "top_k": 50,
157 "top_p": 1.0,
158 "torch_dtype": null,
159 "torchscript": false,
160 "transformers_version": "4.16.0.dev0",
161 "use_bfloat16": false
162 },
163 "vision_config_dict": {
164 "hidden_size": 1024,
165 "intermediate_size": 4096,
166 "num_attention_heads": 16,
167 "num_hidden_layers": 24,
168 "patch_size": 14,
169 "projection_dim": 768
170 }
171 }
172