config.json
1.2 KB · 45 lines · json Raw
1 {
2 "_name_or_path": "ModernBERT-base",
3 "architectures": [
4 "ModernBertForMaskedLM"
5 ],
6 "attention_bias": false,
7 "attention_dropout": 0.0,
8 "bos_token_id": 50281,
9 "classifier_activation": "gelu",
10 "classifier_bias": false,
11 "classifier_dropout": 0.0,
12 "classifier_pooling": "mean",
13 "cls_token_id": 50281,
14 "decoder_bias": true,
15 "deterministic_flash_attn": false,
16 "embedding_dropout": 0.0,
17 "eos_token_id": 50282,
18 "global_attn_every_n_layers": 3,
19 "global_rope_theta": 160000.0,
20 "gradient_checkpointing": false,
21 "hidden_activation": "gelu",
22 "hidden_size": 768,
23 "initializer_cutoff_factor": 2.0,
24 "initializer_range": 0.02,
25 "intermediate_size": 1152,
26 "layer_norm_eps": 1e-05,
27 "local_attention": 128,
28 "local_rope_theta": 10000.0,
29 "max_position_embeddings": 8192,
30 "mlp_bias": false,
31 "mlp_dropout": 0.0,
32 "model_type": "modernbert",
33 "norm_bias": false,
34 "norm_eps": 1e-05,
35 "num_attention_heads": 12,
36 "num_hidden_layers": 22,
37 "pad_token_id": 50283,
38 "position_embedding_type": "absolute",
39 "sep_token_id": 50282,
40 "tie_word_embeddings": true,
41 "torch_dtype": "float32",
42 "transformers_version": "4.47.0.dev0",
43 "vocab_size": 50368
44 }
45