params.json · Voxtral-Mini-4B-Realtime-2602

params.json

1.3 KB · 56 lines · json Raw

1	`{`
2	`"dim": 3072,`
3	`"n_layers": 26,`
4	`"head_dim": 128,`
5	`"hidden_dim": 9216,`
6	`"n_heads": 32,`
7	`"n_kv_heads": 8,`
8	`"use_biases": false,`
9	`"causal": true,`
10	`"rope_theta": 1000000.0,`
11	`"norm_eps": 1e-05,`
12	`"vocab_size": 131072,`
13	`"model_parallel": 1,`
14	`"tied_embeddings": true,`
15	`"sliding_window": 8192,`
16	`"model_max_length": 131072,`
17	`"multimodal": {`
18	`"whisper_model_args": {`
19	`"encoder_args": {`
20	`"audio_encoding_args": {`
21	`"sampling_rate": 16000,`
22	`"frame_rate": 12.5,`
23	`"num_mel_bins": 128,`
24	`"hop_length": 160,`
25	`"window_size": 400,`
26	`"chunk_length_s": null,`
27	`"global_log_mel_max": 1.5,`
28	`"transcription_format": "streaming"`
29	`},`
30	`"dim": 1280,`
31	`"n_layers": 32,`
32	`"head_dim": 64,`
33	`"hidden_dim": 5120,`
34	`"n_heads": 32,`
35	`"vocab_size": 131072,`
36	`"n_kv_heads": 32,`
37	`"use_biases": true,`
38	`"use_cache": false,`
39	`"rope_theta": 1000000.0,`
40	`"causal": true,`
41	`"norm_eps": 1e-05,`
42	`"pos_embed": "rope",`
43	`"max_source_positions": null,`
44	`"ffn_type": "swiglu",`
45	`"norm_type": "rms_norm",`
46	`"sliding_window": 750`
47	`},`
48	`"downsample_args": {`
49	`"downsample_factor": 4`
50	`}`
51	`}`
52	`},`
53	`"ada_rms_norm_t_cond": true,`
54	`"ada_rms_norm_t_cond_dim": 32`
55	`}`
56