config.json
950 B · 39 lines · json Raw
1 {
2 "architectures": [
3 "LlavaForConditionalGeneration"
4 ],
5 "ignore_index": -100,
6 "image_token_index": 32000,
7 "model_type": "llava",
8 "pad_token_id": 32001,
9 "projector_hidden_act": "gelu",
10 "text_config": {
11 "_name_or_path": "lmsys/vicuna-7b-v1.5",
12 "architectures": [
13 "LlamaForCausalLM"
14 ],
15 "max_position_embeddings": 4096,
16 "model_type": "llama",
17 "rms_norm_eps": 1e-05,
18 "torch_dtype": "float16",
19 "vocab_size": 32064
20 },
21 "tie_word_embeddings": false,
22 "torch_dtype": "float16",
23 "transformers_version": "4.36.0.dev0",
24 "vision_config": {
25 "hidden_size": 1024,
26 "image_size": 336,
27 "intermediate_size": 4096,
28 "model_type": "clip_vision_model",
29 "num_attention_heads": 16,
30 "num_hidden_layers": 24,
31 "patch_size": 14,
32 "projection_dim": 768,
33 "vocab_size": 32000
34 },
35 "vision_feature_layer": -2,
36 "vision_feature_select_strategy": "default",
37 "vocab_size": 32064
38 }
39