config.json
1.8 KB · 71 lines · json Raw
1 {
2 "audio_decoder_config": {
3 "attention_o_bias": false,
4 "attention_qk_norm": false,
5 "attention_qkv_bias": false,
6 "audio_hidden_dim": 5120,
7 "dim": 2560,
8 "dropout": 0.0,
9 "head_dim": 128,
10 "initializer_range": 0.01976423537605237,
11 "intermediate_size": 9728,
12 "max_seq_len": 11,
13 "model_type": "fish_qwen3_audio_decoder",
14 "moe_intermediate_size": 768,
15 "n_head": 32,
16 "n_layer": 4,
17 "n_local_heads": 8,
18 "norm_eps": 1e-06,
19 "norm_topk_prob": true,
20 "num_codebooks": 10,
21 "num_experts": 1,
22 "num_experts_per_tok": 1,
23 "rope_base": 1000000,
24 "router_gamma": 0.001,
25 "text_dim": 2560,
26 "tie_word_embeddings": false,
27 "use_aux_loss_free": false,
28 "use_bfloat16": false,
29 "use_gradient_checkpointing": true,
30 "use_moe": false,
31 "vocab_size": 4096
32 },
33 "audio_pad_token_id": 151677,
34 "dtype": "bfloat16",
35 "eos_token_id": 151645,
36 "model_type": "fish_qwen3_omni",
37 "pad_token_id": 151669,
38 "semantic_end_token_id": 155773,
39 "semantic_start_token_id": 151678,
40 "text_config": {
41 "attention_o_bias": false,
42 "attention_qk_norm": true,
43 "attention_qkv_bias": false,
44 "audio_hidden_dim": 5120,
45 "dim": 2560,
46 "dropout": 0.0,
47 "head_dim": 128,
48 "initializer_range": 0.01976423537605237,
49 "intermediate_size": 9728,
50 "max_seq_len": 32768,
51 "model_type": "fish_qwen3",
52 "moe_intermediate_size": 768,
53 "n_head": 32,
54 "n_layer": 36,
55 "n_local_heads": 8,
56 "norm_eps": 1e-06,
57 "norm_topk_prob": true,
58 "num_experts": 1,
59 "num_experts_per_tok": 1,
60 "rope_base": 1000000,
61 "router_gamma": 0.001,
62 "tie_word_embeddings": true,
63 "use_aux_loss_free": false,
64 "use_bfloat16": false,
65 "use_gradient_checkpointing": true,
66 "use_moe": false,
67 "vocab_size": 155776
68 },
69 "transformers_version": "4.57.1"
70 }
71