config.json
1.0 KB · 46 lines · json Raw
1 {
2 "_name_or_path": "microsoft/deberta-v3-base",
3 "architectures": [
4 "DebertaV2ForSequenceClassification"
5 ],
6 "attention_probs_dropout_prob": 0.1,
7 "hidden_act": "gelu",
8 "hidden_dropout_prob": 0.1,
9 "hidden_size": 768,
10 "id2label": {
11 "0": "contradiction",
12 "1": "entailment",
13 "2": "neutral"
14 },
15 "initializer_range": 0.02,
16 "intermediate_size": 3072,
17 "label2id": {
18 "contradiction": 0,
19 "entailment": 1,
20 "neutral": 2
21 },
22 "layer_norm_eps": 1e-07,
23 "max_position_embeddings": 512,
24 "max_relative_positions": -1,
25 "model_type": "deberta-v2",
26 "norm_rel_ebd": "layer_norm",
27 "num_attention_heads": 12,
28 "num_hidden_layers": 12,
29 "pad_token_id": 0,
30 "pooler_dropout": 0,
31 "pooler_hidden_act": "gelu",
32 "pooler_hidden_size": 768,
33 "pos_att_type": [
34 "p2c",
35 "c2p"
36 ],
37 "position_biased_input": false,
38 "position_buckets": 256,
39 "relative_attention": true,
40 "share_att_key": true,
41 "torch_dtype": "float32",
42 "transformers_version": "4.11.3",
43 "type_vocab_size": 0,
44 "vocab_size": 128100
45 }
46