config.json
4.4 KB · 168 lines · json Raw
1 {
2 "architectures": [
3 "Qwen3TTSForConditionalGeneration"
4 ],
5 "assistant_token_id": 77091,
6 "im_end_token_id": 151645,
7 "im_start_token_id": 151644,
8 "tts_bos_token_id": 151672,
9 "tts_eos_token_id": 151673,
10 "tts_pad_token_id": 151671,
11 "model_type": "qwen3_tts",
12 "tokenizer_type": "qwen3_tts_tokenizer_12hz",
13 "tts_model_size": "0b6",
14 "tts_model_type": "base",
15 "speaker_encoder_config": {
16 "enc_dim": 1024,
17 "sample_rate": 24000
18 },
19 "talker_config": {
20 "attention_bias": false,
21 "attention_dropout": 0,
22 "code_predictor_config": {
23 "_name_or_path": "",
24 "add_cross_attention": false,
25 "architectures": null,
26 "attention_bias": false,
27 "attention_dropout": 0,
28 "bad_words_ids": null,
29 "begin_suppress_tokens": null,
30 "bos_token_id": null,
31 "chunk_size_feed_forward": 0,
32 "cross_attention_hidden_size": null,
33 "decoder_start_token_id": null,
34 "diversity_penalty": 0.0,
35 "do_sample": false,
36 "early_stopping": false,
37 "encoder_no_repeat_ngram_size": 0,
38 "eos_token_id": null,
39 "exponential_decay_length_penalty": null,
40 "finetuning_task": null,
41 "forced_bos_token_id": null,
42 "forced_eos_token_id": null,
43 "head_dim": 128,
44 "hidden_act": "silu",
45 "hidden_size": 1024,
46 "id2label": {
47 "0": "LABEL_0",
48 "1": "LABEL_1"
49 },
50 "initializer_range": 0.02,
51 "intermediate_size": 3072,
52 "is_decoder": false,
53 "is_encoder_decoder": false,
54 "label2id": {
55 "LABEL_0": 0,
56 "LABEL_1": 1
57 },
58 "layer_types": [
59 "full_attention",
60 "full_attention",
61 "full_attention",
62 "full_attention",
63 "full_attention"
64 ],
65 "length_penalty": 1.0,
66 "max_length": 20,
67 "max_position_embeddings": 65536,
68 "max_window_layers": 28,
69 "min_length": 0,
70 "model_type": "qwen3_tts_talker_code_predictor",
71 "no_repeat_ngram_size": 0,
72 "num_attention_heads": 16,
73 "num_beam_groups": 1,
74 "num_beams": 1,
75 "num_code_groups": 16,
76 "num_hidden_layers": 5,
77 "num_key_value_heads": 8,
78 "num_return_sequences": 1,
79 "output_attentions": false,
80 "output_hidden_states": false,
81 "output_scores": false,
82 "pad_token_id": null,
83 "prefix": null,
84 "problem_type": null,
85 "pruned_heads": {},
86 "remove_invalid_values": false,
87 "repetition_penalty": 1.0,
88 "return_dict": true,
89 "return_dict_in_generate": false,
90 "rms_norm_eps": 1e-06,
91 "rope_scaling": null,
92 "rope_theta": 1000000,
93 "sep_token_id": null,
94 "sliding_window": null,
95 "suppress_tokens": null,
96 "task_specific_params": null,
97 "temperature": 1.0,
98 "tf_legacy_loss": false,
99 "tie_encoder_decoder": false,
100 "tie_word_embeddings": false,
101 "tokenizer_class": null,
102 "top_k": 50,
103 "top_p": 1.0,
104 "dtype": null,
105 "torchscript": false,
106 "typical_p": 1.0,
107 "use_bfloat16": false,
108 "use_cache": true,
109 "use_sliding_window": false,
110 "vocab_size": 2048
111 },
112 "codec_bos_id": 2149,
113 "codec_eos_token_id": 2150,
114 "codec_think_id": 2154,
115 "codec_language_id": {
116 "chinese": 2055,
117 "english": 2050,
118 "german": 2053,
119 "italian": 2070,
120 "portuguese": 2071,
121 "spanish": 2054,
122 "japanese": 2058,
123 "korean": 2064,
124 "french": 2061,
125 "russian": 2069
126 },
127 "codec_nothink_id": 2155,
128 "codec_pad_id": 2148,
129 "codec_think_bos_id": 2156,
130 "codec_think_eos_id": 2157,
131 "spk_id": {
132 },
133 "spk_is_dialect": {
134 },
135 "head_dim": 128,
136 "hidden_act": "silu",
137 "hidden_size": 1024,
138 "initializer_range": 0.02,
139 "intermediate_size": 3072,
140 "max_position_embeddings": 32768,
141 "model_type": "qwen3_tts_talker",
142 "num_attention_heads": 16,
143 "num_code_groups": 16,
144 "num_hidden_layers": 28,
145 "num_key_value_heads": 8,
146 "position_id_per_seconds": 13,
147 "rms_norm_eps": 1e-06,
148 "rope_scaling": {
149 "interleaved": true,
150 "mrope_section": [
151 24,
152 20,
153 20
154 ],
155 "rope_type": "default",
156 "type": "default"
157 },
158 "rope_theta": 1000000,
159 "sliding_window": null,
160 "text_hidden_size": 2048,
161 "text_vocab_size": 151936,
162 "use_cache": true,
163 "use_sliding_window": false,
164 "vocab_size": 3072
165 },
166 "transformers_version": "4.57.3"
167 }
168