config.json
2.5 KB · 114 lines · json Raw
1 {
2 "_name_or_path": "deepseek-ai/DeepSeek-OCR-2",
3 "candidate_resolutions": [
4 [
5 1024,
6 1024
7 ]
8 ],
9 "global_view_pos": "head",
10 "architectures": [
11 "DeepseekOCR2ForCausalLM"
12 ],
13 "auto_map": {
14 "AutoConfig": "modeling_deepseekocr2.DeepseekOCR2Config",
15 "AutoModel": "modeling_deepseekocr2.DeepseekOCR2ForCausalLM"
16 },
17 "language_config": {
18 "architectures": [
19 "DeepseekV2ForCausalLM"
20 ],
21 "auto_map": {
22 "AutoConfig": "configuration_deepseekv2.DeepseekV2Config",
23 "AutoModel": "modeling_deepseek.DeepseekV2Model",
24 "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
25 },
26 "bos_token_id": 0,
27 "eos_token_id": 1,
28 "first_k_dense_replace": 1,
29 "hidden_size": 1280,
30 "intermediate_size": 6848,
31 "kv_lora_rank": null,
32 "lm_head": true,
33 "max_position_embeddings": 8192,
34 "moe_intermediate_size": 896,
35 "n_group": 1,
36 "n_routed_experts": 64,
37 "n_shared_experts": 2,
38 "num_attention_heads": 10,
39 "num_experts_per_tok": 6,
40 "num_hidden_layers": 12,
41 "num_key_value_heads": 10,
42 "q_lora_rank": null,
43 "qk_nope_head_dim": 0,
44 "qk_rope_head_dim": 0,
45 "rm_head": false,
46 "topk_group": 1,
47 "topk_method": "greedy",
48 "torch_dtype": "bfloat16",
49 "use_mla": false,
50 "v_head_dim": 0,
51 "vocab_size": 129280
52 },
53 "model_type": "deepseek_vl_v2",
54 "projector_config": {
55 "input_dim": 896,
56 "model_type": "mlp_projector",
57 "n_embed": 1280,
58 "projector_type": "linear"
59 },
60 "tile_tag": "2D",
61 "torch_dtype": "bfloat16",
62 "transformers_version": "4.46.3",
63 "vision_config": {
64 "image_size": 1024,
65 "mlp_ratio": 3.7362,
66 "model_name": "deepencoderv2",
67 "model_type": "vision",
68 "width": {
69 "qwen2-0-5b": {
70 "dim": 896
71 },
72 "sam_vit_b": {
73 "downsample_channels": [
74 512,
75 1024
76 ],
77 "global_attn_indexes": [
78 2,
79 5,
80 8,
81 11
82 ],
83 "heads": 12,
84 "layers": 12,
85 "width": 768
86 }
87 }
88 },
89 "bos_token_id": 0,
90 "eos_token_id": 1,
91 "first_k_dense_replace": 1,
92 "hidden_size": 1280,
93 "intermediate_size": 6848,
94 "kv_lora_rank": null,
95 "lm_head": true,
96 "max_position_embeddings": 8192,
97 "moe_intermediate_size": 896,
98 "n_group": 1,
99 "n_routed_experts": 64,
100 "n_shared_experts": 2,
101 "num_attention_heads": 10,
102 "num_experts_per_tok": 6,
103 "num_hidden_layers": 12,
104 "num_key_value_heads": 10,
105 "q_lora_rank": null,
106 "qk_nope_head_dim": 0,
107 "qk_rope_head_dim": 0,
108 "rm_head": false,
109 "topk_group": 1,
110 "topk_method": "greedy",
111 "use_mla": false,
112 "v_head_dim": 0,
113 "vocab_size": 129280
114 }