config.json
1.1 KB · 44 lines · json Raw
1 {
2 "architectures": [
3 "HiggsAudioV2ForConditionalGeneration"
4 ],
5 "attention_bias": false,
6 "attention_dropout": 0.0,
7 "audio_bos_token_id": 128013,
8 "audio_delay_token_id": 128014,
9 "audio_stream_bos_id": 1024,
10 "audio_stream_eos_id": 1025,
11 "audio_token_id": 128016,
12 "bos_token_id": 1,
13 "codebook_size": 1026,
14 "dtype": "bfloat16",
15 "eos_token_id": 128009,
16 "head_dim": 128,
17 "hidden_act": "silu",
18 "hidden_size": 3072,
19 "initializer_range": 0.02,
20 "intermediate_size": 8192,
21 "max_position_embeddings": 2048,
22 "mlp_bias": false,
23 "model_type": "higgs_audio_v2",
24 "num_attention_heads": 24,
25 "num_codebooks": 8,
26 "num_hidden_layers": 28,
27 "num_key_value_heads": 8,
28 "pad_token_id": 128001,
29 "pretraining_tp": 1,
30 "rms_norm_eps": 1e-05,
31 "rope_parameters": {
32 "factor": 32.0,
33 "high_freq_factor": 0.5,
34 "low_freq_factor": 0.125,
35 "original_max_position_embeddings": 1024,
36 "rope_theta": 500000.0,
37 "rope_type": "llama3"
38 },
39 "tie_word_embeddings": false,
40 "transformers_version": "5.3.0.dev0",
41 "use_cache": true,
42 "vocab_size": 128256
43 }
44