config.json · distilbart-mnli-12-1

config.json

1.4 KB · 57 lines · json Raw

1	`{`
2	`"_num_labels": 3,`
3	`"activation_dropout": 0.0,`
4	`"activation_function": "gelu",`
5	`"add_bias_logits": false,`
6	`"add_final_layer_norm": false,`
7	`"architectures": [`
8	`"BartForSequenceClassification"`
9	`],`
10	`"attention_dropout": 0.1,`
11	`"bos_token_id": 0,`
12	`"classif_dropout": 0.0,`
13	`"classifier_dropout": 0.0,`
14	`"d_model": 1024,`
15	`"decoder_attention_heads": 16,`
16	`"decoder_ffn_dim": 4096,`
17	`"decoder_layerdrop": 0.0,`
18	`"decoder_layers": 1,`
19	`"decoder_start_token_id": 2,`
20	`"dropout": 0.1,`
21	`"encoder_attention_heads": 16,`
22	`"encoder_ffn_dim": 4096,`
23	`"encoder_layerdrop": 0.0,`
24	`"encoder_layers": 12,`
25	`"eos_token_id": 2,`
26	`"extra_pos_embeddings": 2,`
27	`"finetuning_task": "mnli",`
28	`"force_bos_token_to_be_generated": false,`
29	`"forced_eos_token_id": 2,`
30	`"gradient_checkpointing": false,`
31	`"id2label": {`
32	`"0": "contradiction",`
33	`"1": "neutral",`
34	`"2": "entailment"`
35	`},`
36	`"init_std": 0.02,`
37	`"is_encoder_decoder": true,`
38	`"label2id": {`
39	`"contradiction": 0,`
40	`"entailment": 2,`
41	`"neutral": 1`
42	`},`
43	`"max_position_embeddings": 1024,`
44	`"model_type": "bart",`
45	`"normalize_before": false,`
46	`"normalize_embedding": true,`
47	`"num_hidden_layers": 12,`
48	`"output_past": false,`
49	`"pad_token_id": 1,`
50	`"scale_embedding": false,`
51	`"static_position_embeddings": false,`
52	`"total_flos": 153130534133111808,`
53	`"transformers_version": "4.7.0.dev0",`
54	`"use_cache": true,`
55	`"vocab_size": 50265`
56	`}`
57