config.json · bart-large-mnli

config.json

1.1 KB · 50 lines · json Raw

1	`{`
2	`"_num_labels": 3,`
3	`"activation_dropout": 0.0,`
4	`"activation_function": "gelu",`
5	`"add_final_layer_norm": false,`
6	`"architectures": [`
7	`"BartForSequenceClassification"`
8	`],`
9	`"attention_dropout": 0.0,`
10	`"bos_token_id": 0,`
11	`"classif_dropout": 0.0,`
12	`"classifier_dropout": 0.0,`
13	`"d_model": 1024,`
14	`"decoder_attention_heads": 16,`
15	`"decoder_ffn_dim": 4096,`
16	`"decoder_layerdrop": 0.0,`
17	`"decoder_layers": 12,`
18	`"decoder_start_token_id": 2,`
19	`"dropout": 0.1,`
20	`"encoder_attention_heads": 16,`
21	`"encoder_ffn_dim": 4096,`
22	`"encoder_layerdrop": 0.0,`
23	`"encoder_layers": 12,`
24	`"eos_token_id": 2,`
25	`"forced_eos_token_id": 2,`
26	`"gradient_checkpointing": false,`
27	`"id2label": {`
28	`"0": "contradiction",`
29	`"1": "neutral",`
30	`"2": "entailment"`
31	`},`
32	`"init_std": 0.02,`
33	`"is_encoder_decoder": true,`
34	`"label2id": {`
35	`"contradiction": 0,`
36	`"entailment": 2,`
37	`"neutral": 1`
38	`},`
39	`"max_position_embeddings": 1024,`
40	`"model_type": "bart",`
41	`"normalize_before": false,`
42	`"num_hidden_layers": 12,`
43	`"output_past": false,`
44	`"pad_token_id": 1,`
45	`"scale_embedding": false,`
46	`"transformers_version": "4.7.0.dev0",`
47	`"use_cache": true,`
48	`"vocab_size": 50265`
49	`}`
50