config.json
1.2 KB · 52 lines · json Raw
1 {
2 "architectures": [
3 "T5WithLMHeadModel"
4 ],
5 "d_ff": 16384,
6 "d_kv": 128,
7 "d_model": 1024,
8 "decoder_start_token_id": 0,
9 "dropout_rate": 0.1,
10 "eos_token_id": 1,
11 "initializer_factor": 1.0,
12 "is_encoder_decoder": true,
13 "layer_norm_epsilon": 1e-06,
14 "model_type": "t5",
15 "n_positions": 512,
16 "num_heads": 32,
17 "num_layers": 24,
18 "output_past": true,
19 "pad_token_id": 0,
20 "relative_attention_num_buckets": 32,
21 "task_specific_params": {
22 "summarization": {
23 "early_stopping": true,
24 "length_penalty": 2.0,
25 "max_length": 200,
26 "min_length": 30,
27 "no_repeat_ngram_size": 3,
28 "num_beams": 4,
29 "prefix": "summarize: "
30 },
31 "translation_en_to_de": {
32 "early_stopping": true,
33 "max_length": 300,
34 "num_beams": 4,
35 "prefix": "translate English to German: "
36 },
37 "translation_en_to_fr": {
38 "early_stopping": true,
39 "max_length": 300,
40 "num_beams": 4,
41 "prefix": "translate English to French: "
42 },
43 "translation_en_to_ro": {
44 "early_stopping": true,
45 "max_length": 300,
46 "num_beams": 4,
47 "prefix": "translate English to Romanian: "
48 }
49 },
50 "vocab_size": 32128
51 }
52