config.json
1002 B · 49 lines · json Raw
1 {
2 "architectures": [
3 "VideoMAEForVideoClassification"
4 ],
5 "attention_probs_dropout_prob": 0.0,
6 "decoder_hidden_size": 192,
7 "decoder_intermediate_size": 768,
8 "decoder_num_attention_heads": 3,
9 "decoder_num_hidden_layers": 12,
10 "dtype": "float32",
11 "hidden_act": "gelu",
12 "hidden_dropout_prob": 0.0,
13 "hidden_size": 384,
14 "id2label": {
15 "0": "A",
16 "1": "B1",
17 "2": "B2",
18 "3": "B4",
19 "4": "B5",
20 "5": "B6",
21 "6": "G"
22 },
23 "image_size": 224,
24 "initializer_range": 0.02,
25 "intermediate_size": 1536,
26 "label2id": {
27 "A": 0,
28 "B1": 1,
29 "B2": 2,
30 "B4": 3,
31 "B5": 4,
32 "B6": 5,
33 "G": 6
34 },
35 "layer_norm_eps": 1e-12,
36 "model_type": "videomae",
37 "norm_pix_loss": true,
38 "num_attention_heads": 16,
39 "num_channels": 3,
40 "num_frames": 16,
41 "num_hidden_layers": 12,
42 "patch_size": 16,
43 "problem_type": "single_label_classification",
44 "qkv_bias": true,
45 "transformers_version": "4.57.1",
46 "tubelet_size": 2,
47 "use_mean_pooling": true
48 }
49