config.json
1.3 KB · 52 lines · json Raw
1 {
2 "_name_or_path": "gte-reranker-modernbert-base",
3 "architectures": [
4 "ModernBertForSequenceClassification"
5 ],
6 "attention_bias": false,
7 "attention_dropout": 0.0,
8 "bos_token_id": 50281,
9 "classifier_activation": "gelu",
10 "classifier_bias": false,
11 "classifier_dropout": 0.0,
12 "classifier_pooling": "mean",
13 "cls_token_id": 50281,
14 "decoder_bias": true,
15 "deterministic_flash_attn": false,
16 "embedding_dropout": 0.0,
17 "eos_token_id": 50282,
18 "global_attn_every_n_layers": 3,
19 "global_rope_theta": 160000.0,
20 "gradient_checkpointing": false,
21 "hidden_activation": "gelu",
22 "hidden_size": 768,
23 "id2label": {
24 "0": "LABEL_0"
25 },
26 "initializer_cutoff_factor": 2.0,
27 "initializer_range": 0.02,
28 "intermediate_size": 1152,
29 "label2id": {
30 "LABEL_0": 0
31 },
32 "layer_norm_eps": 1e-05,
33 "local_attention": 128,
34 "local_rope_theta": 10000.0,
35 "max_position_embeddings": 8192,
36 "mlp_bias": false,
37 "mlp_dropout": 0.0,
38 "model_type": "modernbert",
39 "norm_bias": false,
40 "norm_eps": 1e-05,
41 "num_attention_heads": 12,
42 "num_hidden_layers": 22,
43 "pad_token_id": 50283,
44 "position_embedding_type": "absolute",
45 "sep_token_id": 50282,
46 "sparse_pred_ignore_index": -100,
47 "sparse_prediction": false,
48 "torch_dtype": "float32",
49 "transformers_version": "4.48.0.dev0",
50 "vocab_size": 50368
51 }
52