config.json
2.0 KB · 72 lines · json Raw
1 {
2 "activation_function": "swiglu",
3 "architectures": ["NomicBertModel"],
4 "attn_pdrop": 0.0,
5 "attention_probs_dropout_prob": 0.0,
6 "auto_map": {
7 "AutoConfig": "nomic-ai/nomic-bert-2048--configuration_hf_nomic_bert.NomicBertConfig",
8 "AutoModel": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertModel",
9 "AutoModelForMaskedLM": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForPreTraining"
10 },
11 "bos_token_id": null,
12 "causal": false,
13 "classifier_dropout": null,
14 "dense_seq_output": true,
15 "embd_pdrop": 0.0,
16 "eos_token_id": null,
17 "fused_bias_fc": true,
18 "fused_dropout_add_ln": true,
19 "head_dim": 64,
20 "hidden_act": "silu",
21 "hidden_dropout_prob": 0.0,
22 "hidden_size": 768,
23 "initializer_range": 0.02,
24 "intermediate_size": 3072,
25 "layer_norm_epsilon": 1e-12,
26 "layer_norm_eps": 1e-12,
27 "max_position_embeddings": 8192,
28 "mlp_fc1_bias": false,
29 "mlp_fc2_bias": false,
30 "model_type": "nomic_bert",
31 "n_embd": 768,
32 "n_head": 12,
33 "n_inner": 3072,
34 "n_layer": 12,
35 "n_positions": 8192,
36 "num_attention_heads": 12,
37 "num_hidden_layers": 12,
38 "pad_token_id": 0,
39 "pad_vocab_size_multiple": 64,
40 "parallel_block": false,
41 "parallel_block_tied_norm": false,
42 "prenorm": false,
43 "qkv_proj_bias": false,
44 "reorder_and_upcast_attn": false,
45 "resid_pdrop": 0.0,
46 "rope_parameters": {
47 "rope_theta": 1000.0,
48 "rope_type": "dynamic",
49 "factor": 2.0
50 },
51 "rotary_emb_base": 1000,
52 "rotary_emb_fraction": 1.0,
53 "rotary_emb_interleaved": false,
54 "rotary_emb_scale_base": null,
55 "rotary_scaling_factor": 2,
56 "scale_attn_by_inverse_layer_idx": false,
57 "scale_attn_weights": true,
58 "summary_activation": null,
59 "summary_first_dropout": 0.1,
60 "summary_proj_to_labels": true,
61 "summary_type": "cls_index",
62 "summary_use_proj": true,
63 "torch_dtype": "float32",
64 "transformers_version": "5.3.0.dev0",
65 "type_vocab_size": 2,
66 "use_cache": true,
67 "use_flash_attn": true,
68 "use_rms_norm": false,
69 "use_xentropy": true,
70 "vocab_size": 30528
71 }
72