config.json
5.3 KB · 271 lines · json Raw
1 {
2 "activation_dropout": 0.0,
3 "activation_function": "silu",
4 "anchor_image_size": null,
5 "architectures": [
6 "RtDetrV2ForObjectDetection"
7 ],
8 "attention_dropout": 0.0,
9 "auxiliary_loss": true,
10 "backbone": null,
11 "backbone_config": {
12 "depths": [
13 2,
14 2,
15 2,
16 2
17 ],
18 "hidden_sizes": [
19 64,
20 128,
21 256,
22 512
23 ],
24 "layer_type": "basic",
25 "model_type": "rt_detr_resnet",
26 "out_features": [
27 "stage2",
28 "stage3",
29 "stage4"
30 ],
31 "out_indices": [
32 2,
33 3,
34 4
35 ]
36 },
37 "backbone_kwargs": null,
38 "batch_norm_eps": 1e-05,
39 "box_noise_scale": 1.0,
40 "d_model": 256,
41 "decoder_activation_function": "relu",
42 "decoder_attention_heads": 8,
43 "decoder_ffn_dim": 1024,
44 "decoder_in_channels": [
45 256,
46 256,
47 256
48 ],
49 "decoder_layers": 3,
50 "decoder_method": "default",
51 "decoder_n_levels": 3,
52 "decoder_n_points": 4,
53 "decoder_offset_scale": 0.5,
54 "disable_custom_kernels": true,
55 "dropout": 0.0,
56 "encode_proj_layers": [
57 2
58 ],
59 "encoder_activation_function": "gelu",
60 "encoder_attention_heads": 8,
61 "encoder_ffn_dim": 1024,
62 "encoder_hidden_dim": 256,
63 "encoder_in_channels": [
64 128,
65 256,
66 512
67 ],
68 "encoder_layers": 1,
69 "eos_coefficient": 0.0001,
70 "eval_size": null,
71 "feat_strides": [
72 8,
73 16,
74 32
75 ],
76 "focal_loss_alpha": 0.75,
77 "focal_loss_gamma": 2.0,
78 "freeze_backbone_batch_norms": true,
79 "hidden_expansion": 0.5,
80 "id2label": {
81 "0": "person",
82 "1": "bicycle",
83 "2": "car",
84 "3": "motorbike",
85 "4": "aeroplane",
86 "5": "bus",
87 "6": "train",
88 "7": "truck",
89 "8": "boat",
90 "9": "traffic light",
91 "10": "fire hydrant",
92 "11": "stop sign",
93 "12": "parking meter",
94 "13": "bench",
95 "14": "bird",
96 "15": "cat",
97 "16": "dog",
98 "17": "horse",
99 "18": "sheep",
100 "19": "cow",
101 "20": "elephant",
102 "21": "bear",
103 "22": "zebra",
104 "23": "giraffe",
105 "24": "backpack",
106 "25": "umbrella",
107 "26": "handbag",
108 "27": "tie",
109 "28": "suitcase",
110 "29": "frisbee",
111 "30": "skis",
112 "31": "snowboard",
113 "32": "sports ball",
114 "33": "kite",
115 "34": "baseball bat",
116 "35": "baseball glove",
117 "36": "skateboard",
118 "37": "surfboard",
119 "38": "tennis racket",
120 "39": "bottle",
121 "40": "wine glass",
122 "41": "cup",
123 "42": "fork",
124 "43": "knife",
125 "44": "spoon",
126 "45": "bowl",
127 "46": "banana",
128 "47": "apple",
129 "48": "sandwich",
130 "49": "orange",
131 "50": "broccoli",
132 "51": "carrot",
133 "52": "hot dog",
134 "53": "pizza",
135 "54": "donut",
136 "55": "cake",
137 "56": "chair",
138 "57": "sofa",
139 "58": "pottedplant",
140 "59": "bed",
141 "60": "diningtable",
142 "61": "toilet",
143 "62": "tvmonitor",
144 "63": "laptop",
145 "64": "mouse",
146 "65": "remote",
147 "66": "keyboard",
148 "67": "cell phone",
149 "68": "microwave",
150 "69": "oven",
151 "70": "toaster",
152 "71": "sink",
153 "72": "refrigerator",
154 "73": "book",
155 "74": "clock",
156 "75": "vase",
157 "76": "scissors",
158 "77": "teddy bear",
159 "78": "hair drier",
160 "79": "toothbrush"
161 },
162 "initializer_bias_prior_prob": null,
163 "initializer_range": 0.01,
164 "is_encoder_decoder": true,
165 "label2id": {
166 "aeroplane": 4,
167 "apple": 47,
168 "backpack": 24,
169 "banana": 46,
170 "baseball bat": 34,
171 "baseball glove": 35,
172 "bear": 21,
173 "bed": 59,
174 "bench": 13,
175 "bicycle": 1,
176 "bird": 14,
177 "boat": 8,
178 "book": 73,
179 "bottle": 39,
180 "bowl": 45,
181 "broccoli": 50,
182 "bus": 5,
183 "cake": 55,
184 "car": 2,
185 "carrot": 51,
186 "cat": 15,
187 "cell phone": 67,
188 "chair": 56,
189 "clock": 74,
190 "cow": 19,
191 "cup": 41,
192 "diningtable": 60,
193 "dog": 16,
194 "donut": 54,
195 "elephant": 20,
196 "fire hydrant": 10,
197 "fork": 42,
198 "frisbee": 29,
199 "giraffe": 23,
200 "hair drier": 78,
201 "handbag": 26,
202 "horse": 17,
203 "hot dog": 52,
204 "keyboard": 66,
205 "kite": 33,
206 "knife": 43,
207 "laptop": 63,
208 "microwave": 68,
209 "motorbike": 3,
210 "mouse": 64,
211 "orange": 49,
212 "oven": 69,
213 "parking meter": 12,
214 "person": 0,
215 "pizza": 53,
216 "pottedplant": 58,
217 "refrigerator": 72,
218 "remote": 65,
219 "sandwich": 48,
220 "scissors": 76,
221 "sheep": 18,
222 "sink": 71,
223 "skateboard": 36,
224 "skis": 30,
225 "snowboard": 31,
226 "sofa": 57,
227 "spoon": 44,
228 "sports ball": 32,
229 "stop sign": 11,
230 "suitcase": 28,
231 "surfboard": 37,
232 "teddy bear": 77,
233 "tennis racket": 38,
234 "tie": 27,
235 "toaster": 70,
236 "toilet": 61,
237 "toothbrush": 79,
238 "traffic light": 9,
239 "train": 6,
240 "truck": 7,
241 "tvmonitor": 62,
242 "umbrella": 25,
243 "vase": 75,
244 "wine glass": 40,
245 "zebra": 22
246 },
247 "label_noise_ratio": 0.5,
248 "layer_norm_eps": 1e-05,
249 "learn_initial_query": false,
250 "matcher_alpha": 0.25,
251 "matcher_bbox_cost": 5.0,
252 "matcher_class_cost": 2.0,
253 "matcher_gamma": 2.0,
254 "matcher_giou_cost": 2.0,
255 "model_type": "rt_detr_v2",
256 "normalize_before": false,
257 "num_denoising": 100,
258 "num_feature_levels": 3,
259 "num_queries": 300,
260 "positional_encoding_temperature": 10000,
261 "torch_dtype": "float32",
262 "transformers_version": "4.49.0.dev0",
263 "use_focal_loss": true,
264 "use_pretrained_backbone": false,
265 "use_timm_backbone": false,
266 "weight_loss_bbox": 5.0,
267 "weight_loss_giou": 2.0,
268 "weight_loss_vfl": 1.0,
269 "with_box_refine": true
270 }
271