config.json · nli-deberta-v3-base

config.json

1.0 KB · 46 lines · json Raw

1	`{`
2	`"_name_or_path": "microsoft/deberta-v3-base",`
3	`"architectures": [`
4	`"DebertaV2ForSequenceClassification"`
5	`],`
6	`"attention_probs_dropout_prob": 0.1,`
7	`"hidden_act": "gelu",`
8	`"hidden_dropout_prob": 0.1,`
9	`"hidden_size": 768,`
10	`"id2label": {`
11	`"0": "contradiction",`
12	`"1": "entailment",`
13	`"2": "neutral"`
14	`},`
15	`"initializer_range": 0.02,`
16	`"intermediate_size": 3072,`
17	`"label2id": {`
18	`"contradiction": 0,`
19	`"entailment": 1,`
20	`"neutral": 2`
21	`},`
22	`"layer_norm_eps": 1e-07,`
23	`"max_position_embeddings": 512,`
24	`"max_relative_positions": -1,`
25	`"model_type": "deberta-v2",`
26	`"norm_rel_ebd": "layer_norm",`
27	`"num_attention_heads": 12,`
28	`"num_hidden_layers": 12,`
29	`"pad_token_id": 0,`
30	`"pooler_dropout": 0,`
31	`"pooler_hidden_act": "gelu",`
32	`"pooler_hidden_size": 768,`
33	`"pos_att_type": [`
34	`"p2c",`
35	`"c2p"`
36	`],`
37	`"position_biased_input": false,`
38	`"position_buckets": 256,`
39	`"relative_attention": true,`
40	`"share_att_key": true,`
41	`"torch_dtype": "float32",`
42	`"transformers_version": "4.11.3",`
43	`"type_vocab_size": 0,`
44	`"vocab_size": 128100`
45	`}`
46