config.json · ModernBERT-base

config.json

1.2 KB · 45 lines · json Raw

1	`{`
2	`"_name_or_path": "ModernBERT-base",`
3	`"architectures": [`
4	`"ModernBertForMaskedLM"`
5	`],`
6	`"attention_bias": false,`
7	`"attention_dropout": 0.0,`
8	`"bos_token_id": 50281,`
9	`"classifier_activation": "gelu",`
10	`"classifier_bias": false,`
11	`"classifier_dropout": 0.0,`
12	`"classifier_pooling": "mean",`
13	`"cls_token_id": 50281,`
14	`"decoder_bias": true,`
15	`"deterministic_flash_attn": false,`
16	`"embedding_dropout": 0.0,`
17	`"eos_token_id": 50282,`
18	`"global_attn_every_n_layers": 3,`
19	`"global_rope_theta": 160000.0,`
20	`"gradient_checkpointing": false,`
21	`"hidden_activation": "gelu",`
22	`"hidden_size": 768,`
23	`"initializer_cutoff_factor": 2.0,`
24	`"initializer_range": 0.02,`
25	`"intermediate_size": 1152,`
26	`"layer_norm_eps": 1e-05,`
27	`"local_attention": 128,`
28	`"local_rope_theta": 10000.0,`
29	`"max_position_embeddings": 8192,`
30	`"mlp_bias": false,`
31	`"mlp_dropout": 0.0,`
32	`"model_type": "modernbert",`
33	`"norm_bias": false,`
34	`"norm_eps": 1e-05,`
35	`"num_attention_heads": 12,`
36	`"num_hidden_layers": 22,`
37	`"pad_token_id": 50283,`
38	`"position_embedding_type": "absolute",`
39	`"sep_token_id": 50282,`
40	`"tie_word_embeddings": true,`
41	`"torch_dtype": "float32",`
42	`"transformers_version": "4.47.0.dev0",`
43	`"vocab_size": 50368`
44	`}`
45