config.json
1.1 KB · 49 lines · json Raw
1 {
2 "model_type": "nemotron",
3 "architectures": [
4 "NemotronForCausalLM"
5 ],
6
7 "torch_dtype": "bfloat16",
8
9 "eos_token_id": 248046,
10 "pad_token_id": 248044,
11 "image_token_id": 248056,
12 "video_token_id": 248057,
13 "vision_start_token_id": 248053,
14 "vision_end_token_id": 248054,
15
16 "model_name": "Tesleum/Shirdel-Coder-9B-Claude-Fable-5",
17
18 "text_config": {
19 "model_type": "nemotron_text",
20 "hidden_size": 4096,
21 "num_hidden_layers": 32,
22 "num_attention_heads": 16,
23 "num_key_value_heads": 4,
24
25 "hidden_act": "silu",
26 "intermediate_size": 12288,
27
28 "max_position_embeddings": 262144,
29 "rms_norm_eps": 1e-6,
30
31 "rope_parameters": {
32 "rope_theta": 10000000,
33 "rope_type": "default",
34 "mrope_interleaved": true
35 }
36 },
37
38 "tie_word_embeddings": false,
39
40 "unsloth_version": "2026.6.8",
41
42 "vision_config": {
43 "model_type": "qwen3_5",
44 "hidden_size": 1152,
45 "depth": 27,
46 "patch_size": 16,
47 "num_heads": 16
48 }
49 }