config.json
5.2 KB · 258 lines · json Raw
1 {
2 "activation_dropout": 0.0,
3 "activation_function": "silu",
4 "anchor_image_size": null,
5 "architectures": [
6 "RtDetrV2ForObjectDetection"
7 ],
8 "attention_dropout": 0.0,
9 "auxiliary_loss": true,
10 "backbone": null,
11 "backbone_config": {
12 "model_type": "rt_detr_resnet",
13 "out_features": [
14 "stage2",
15 "stage3",
16 "stage4"
17 ],
18 "out_indices": [
19 2,
20 3,
21 4
22 ]
23 },
24 "backbone_kwargs": null,
25 "batch_norm_eps": 1e-05,
26 "box_noise_scale": 1.0,
27 "d_model": 256,
28 "decoder_activation_function": "relu",
29 "decoder_attention_heads": 8,
30 "decoder_ffn_dim": 1024,
31 "decoder_in_channels": [
32 256,
33 256,
34 256
35 ],
36 "decoder_layers": 6,
37 "decoder_method": "default",
38 "decoder_n_levels": 3,
39 "decoder_n_points": 4,
40 "decoder_offset_scale": 0.5,
41 "disable_custom_kernels": true,
42 "dropout": 0.0,
43 "encode_proj_layers": [
44 2
45 ],
46 "encoder_activation_function": "gelu",
47 "encoder_attention_heads": 8,
48 "encoder_ffn_dim": 1024,
49 "encoder_hidden_dim": 256,
50 "encoder_in_channels": [
51 512,
52 1024,
53 2048
54 ],
55 "encoder_layers": 1,
56 "eos_coefficient": 0.0001,
57 "eval_size": null,
58 "feat_strides": [
59 8,
60 16,
61 32
62 ],
63 "focal_loss_alpha": 0.75,
64 "focal_loss_gamma": 2.0,
65 "freeze_backbone_batch_norms": true,
66 "hidden_expansion": 1.0,
67 "id2label": {
68 "0": "person",
69 "1": "bicycle",
70 "2": "car",
71 "3": "motorbike",
72 "4": "aeroplane",
73 "5": "bus",
74 "6": "train",
75 "7": "truck",
76 "8": "boat",
77 "9": "traffic light",
78 "10": "fire hydrant",
79 "11": "stop sign",
80 "12": "parking meter",
81 "13": "bench",
82 "14": "bird",
83 "15": "cat",
84 "16": "dog",
85 "17": "horse",
86 "18": "sheep",
87 "19": "cow",
88 "20": "elephant",
89 "21": "bear",
90 "22": "zebra",
91 "23": "giraffe",
92 "24": "backpack",
93 "25": "umbrella",
94 "26": "handbag",
95 "27": "tie",
96 "28": "suitcase",
97 "29": "frisbee",
98 "30": "skis",
99 "31": "snowboard",
100 "32": "sports ball",
101 "33": "kite",
102 "34": "baseball bat",
103 "35": "baseball glove",
104 "36": "skateboard",
105 "37": "surfboard",
106 "38": "tennis racket",
107 "39": "bottle",
108 "40": "wine glass",
109 "41": "cup",
110 "42": "fork",
111 "43": "knife",
112 "44": "spoon",
113 "45": "bowl",
114 "46": "banana",
115 "47": "apple",
116 "48": "sandwich",
117 "49": "orange",
118 "50": "broccoli",
119 "51": "carrot",
120 "52": "hot dog",
121 "53": "pizza",
122 "54": "donut",
123 "55": "cake",
124 "56": "chair",
125 "57": "sofa",
126 "58": "pottedplant",
127 "59": "bed",
128 "60": "diningtable",
129 "61": "toilet",
130 "62": "tvmonitor",
131 "63": "laptop",
132 "64": "mouse",
133 "65": "remote",
134 "66": "keyboard",
135 "67": "cell phone",
136 "68": "microwave",
137 "69": "oven",
138 "70": "toaster",
139 "71": "sink",
140 "72": "refrigerator",
141 "73": "book",
142 "74": "clock",
143 "75": "vase",
144 "76": "scissors",
145 "77": "teddy bear",
146 "78": "hair drier",
147 "79": "toothbrush"
148 },
149 "initializer_bias_prior_prob": null,
150 "initializer_range": 0.01,
151 "is_encoder_decoder": true,
152 "label2id": {
153 "aeroplane": 4,
154 "apple": 47,
155 "backpack": 24,
156 "banana": 46,
157 "baseball bat": 34,
158 "baseball glove": 35,
159 "bear": 21,
160 "bed": 59,
161 "bench": 13,
162 "bicycle": 1,
163 "bird": 14,
164 "boat": 8,
165 "book": 73,
166 "bottle": 39,
167 "bowl": 45,
168 "broccoli": 50,
169 "bus": 5,
170 "cake": 55,
171 "car": 2,
172 "carrot": 51,
173 "cat": 15,
174 "cell phone": 67,
175 "chair": 56,
176 "clock": 74,
177 "cow": 19,
178 "cup": 41,
179 "diningtable": 60,
180 "dog": 16,
181 "donut": 54,
182 "elephant": 20,
183 "fire hydrant": 10,
184 "fork": 42,
185 "frisbee": 29,
186 "giraffe": 23,
187 "hair drier": 78,
188 "handbag": 26,
189 "horse": 17,
190 "hot dog": 52,
191 "keyboard": 66,
192 "kite": 33,
193 "knife": 43,
194 "laptop": 63,
195 "microwave": 68,
196 "motorbike": 3,
197 "mouse": 64,
198 "orange": 49,
199 "oven": 69,
200 "parking meter": 12,
201 "person": 0,
202 "pizza": 53,
203 "pottedplant": 58,
204 "refrigerator": 72,
205 "remote": 65,
206 "sandwich": 48,
207 "scissors": 76,
208 "sheep": 18,
209 "sink": 71,
210 "skateboard": 36,
211 "skis": 30,
212 "snowboard": 31,
213 "sofa": 57,
214 "spoon": 44,
215 "sports ball": 32,
216 "stop sign": 11,
217 "suitcase": 28,
218 "surfboard": 37,
219 "teddy bear": 77,
220 "tennis racket": 38,
221 "tie": 27,
222 "toaster": 70,
223 "toilet": 61,
224 "toothbrush": 79,
225 "traffic light": 9,
226 "train": 6,
227 "truck": 7,
228 "tvmonitor": 62,
229 "umbrella": 25,
230 "vase": 75,
231 "wine glass": 40,
232 "zebra": 22
233 },
234 "label_noise_ratio": 0.5,
235 "layer_norm_eps": 1e-05,
236 "learn_initial_query": false,
237 "matcher_alpha": 0.25,
238 "matcher_bbox_cost": 5.0,
239 "matcher_class_cost": 2.0,
240 "matcher_gamma": 2.0,
241 "matcher_giou_cost": 2.0,
242 "model_type": "rt_detr_v2",
243 "normalize_before": false,
244 "num_denoising": 100,
245 "num_feature_levels": 3,
246 "num_queries": 300,
247 "positional_encoding_temperature": 10000,
248 "torch_dtype": "float32",
249 "transformers_version": "4.49.0.dev0",
250 "use_focal_loss": true,
251 "use_pretrained_backbone": false,
252 "use_timm_backbone": false,
253 "weight_loss_bbox": 5.0,
254 "weight_loss_giou": 2.0,
255 "weight_loss_vfl": 1.0,
256 "with_box_refine": true
257 }
258