config.json · gte-reranker-modernbert-base

config.json

1.3 KB · 52 lines · json Raw

1	`{`
2	`"_name_or_path": "gte-reranker-modernbert-base",`
3	`"architectures": [`
4	`"ModernBertForSequenceClassification"`
5	`],`
6	`"attention_bias": false,`
7	`"attention_dropout": 0.0,`
8	`"bos_token_id": 50281,`
9	`"classifier_activation": "gelu",`
10	`"classifier_bias": false,`
11	`"classifier_dropout": 0.0,`
12	`"classifier_pooling": "mean",`
13	`"cls_token_id": 50281,`
14	`"decoder_bias": true,`
15	`"deterministic_flash_attn": false,`
16	`"embedding_dropout": 0.0,`
17	`"eos_token_id": 50282,`
18	`"global_attn_every_n_layers": 3,`
19	`"global_rope_theta": 160000.0,`
20	`"gradient_checkpointing": false,`
21	`"hidden_activation": "gelu",`
22	`"hidden_size": 768,`
23	`"id2label": {`
24	`"0": "LABEL_0"`
25	`},`
26	`"initializer_cutoff_factor": 2.0,`
27	`"initializer_range": 0.02,`
28	`"intermediate_size": 1152,`
29	`"label2id": {`
30	`"LABEL_0": 0`
31	`},`
32	`"layer_norm_eps": 1e-05,`
33	`"local_attention": 128,`
34	`"local_rope_theta": 10000.0,`
35	`"max_position_embeddings": 8192,`
36	`"mlp_bias": false,`
37	`"mlp_dropout": 0.0,`
38	`"model_type": "modernbert",`
39	`"norm_bias": false,`
40	`"norm_eps": 1e-05,`
41	`"num_attention_heads": 12,`
42	`"num_hidden_layers": 22,`
43	`"pad_token_id": 50283,`
44	`"position_embedding_type": "absolute",`
45	`"sep_token_id": 50282,`
46	`"sparse_pred_ignore_index": -100,`
47	`"sparse_prediction": false,`
48	`"torch_dtype": "float32",`
49	`"transformers_version": "4.48.0.dev0",`
50	`"vocab_size": 50368`
51	`}`
52