config.json
5.1 KB · 266 lines · json Raw
1 {
2 "activation_dropout": 0.0,
3 "activation_function": "silu",
4 "anchor_image_size": null,
5 "architectures": [
6 "RTDetrForObjectDetection"
7 ],
8 "attention_dropout": 0.0,
9 "auxiliary_loss": true,
10 "backbone": null,
11 "backbone_config": {
12 "depths": [
13 2,
14 2,
15 2,
16 2
17 ],
18 "hidden_sizes": [
19 64,
20 128,
21 256,
22 512
23 ],
24 "layer_type": "basic",
25 "model_type": "rt_detr_resnet",
26 "out_features": [
27 "stage2",
28 "stage3",
29 "stage4"
30 ],
31 "out_indices": [
32 2,
33 3,
34 4
35 ]
36 },
37 "backbone_kwargs": null,
38 "batch_norm_eps": 1e-05,
39 "box_noise_scale": 1.0,
40 "d_model": 256,
41 "decoder_activation_function": "relu",
42 "decoder_attention_heads": 8,
43 "decoder_ffn_dim": 1024,
44 "decoder_in_channels": [
45 256,
46 256,
47 256
48 ],
49 "decoder_layers": 3,
50 "decoder_n_points": 4,
51 "disable_custom_kernels": true,
52 "dropout": 0.0,
53 "encode_proj_layers": [
54 2
55 ],
56 "encoder_activation_function": "gelu",
57 "encoder_attention_heads": 8,
58 "encoder_ffn_dim": 1024,
59 "encoder_hidden_dim": 256,
60 "encoder_in_channels": [
61 128,
62 256,
63 512
64 ],
65 "encoder_layers": 1,
66 "eos_coefficient": 0.0001,
67 "eval_size": null,
68 "feat_strides": [
69 8,
70 16,
71 32
72 ],
73 "focal_loss_alpha": 0.75,
74 "focal_loss_gamma": 2.0,
75 "hidden_expansion": 0.5,
76 "id2label": {
77 "0": "person",
78 "1": "bicycle",
79 "2": "car",
80 "3": "motorbike",
81 "4": "aeroplane",
82 "5": "bus",
83 "6": "train",
84 "7": "truck",
85 "8": "boat",
86 "9": "traffic light",
87 "10": "fire hydrant",
88 "11": "stop sign",
89 "12": "parking meter",
90 "13": "bench",
91 "14": "bird",
92 "15": "cat",
93 "16": "dog",
94 "17": "horse",
95 "18": "sheep",
96 "19": "cow",
97 "20": "elephant",
98 "21": "bear",
99 "22": "zebra",
100 "23": "giraffe",
101 "24": "backpack",
102 "25": "umbrella",
103 "26": "handbag",
104 "27": "tie",
105 "28": "suitcase",
106 "29": "frisbee",
107 "30": "skis",
108 "31": "snowboard",
109 "32": "sports ball",
110 "33": "kite",
111 "34": "baseball bat",
112 "35": "baseball glove",
113 "36": "skateboard",
114 "37": "surfboard",
115 "38": "tennis racket",
116 "39": "bottle",
117 "40": "wine glass",
118 "41": "cup",
119 "42": "fork",
120 "43": "knife",
121 "44": "spoon",
122 "45": "bowl",
123 "46": "banana",
124 "47": "apple",
125 "48": "sandwich",
126 "49": "orange",
127 "50": "broccoli",
128 "51": "carrot",
129 "52": "hot dog",
130 "53": "pizza",
131 "54": "donut",
132 "55": "cake",
133 "56": "chair",
134 "57": "sofa",
135 "58": "pottedplant",
136 "59": "bed",
137 "60": "diningtable",
138 "61": "toilet",
139 "62": "tvmonitor",
140 "63": "laptop",
141 "64": "mouse",
142 "65": "remote",
143 "66": "keyboard",
144 "67": "cell phone",
145 "68": "microwave",
146 "69": "oven",
147 "70": "toaster",
148 "71": "sink",
149 "72": "refrigerator",
150 "73": "book",
151 "74": "clock",
152 "75": "vase",
153 "76": "scissors",
154 "77": "teddy bear",
155 "78": "hair drier",
156 "79": "toothbrush"
157 },
158 "initializer_range": 0.01,
159 "is_encoder_decoder": true,
160 "label2id": {
161 "aeroplane": 4,
162 "apple": 47,
163 "backpack": 24,
164 "banana": 46,
165 "baseball bat": 34,
166 "baseball glove": 35,
167 "bear": 21,
168 "bed": 59,
169 "bench": 13,
170 "bicycle": 1,
171 "bird": 14,
172 "boat": 8,
173 "book": 73,
174 "bottle": 39,
175 "bowl": 45,
176 "broccoli": 50,
177 "bus": 5,
178 "cake": 55,
179 "car": 2,
180 "carrot": 51,
181 "cat": 15,
182 "cell phone": 67,
183 "chair": 56,
184 "clock": 74,
185 "cow": 19,
186 "cup": 41,
187 "diningtable": 60,
188 "dog": 16,
189 "donut": 54,
190 "elephant": 20,
191 "fire hydrant": 10,
192 "fork": 42,
193 "frisbee": 29,
194 "giraffe": 23,
195 "hair drier": 78,
196 "handbag": 26,
197 "horse": 17,
198 "hot dog": 52,
199 "keyboard": 66,
200 "kite": 33,
201 "knife": 43,
202 "laptop": 63,
203 "microwave": 68,
204 "motorbike": 3,
205 "mouse": 64,
206 "orange": 49,
207 "oven": 69,
208 "parking meter": 12,
209 "person": 0,
210 "pizza": 53,
211 "pottedplant": 58,
212 "refrigerator": 72,
213 "remote": 65,
214 "sandwich": 48,
215 "scissors": 76,
216 "sheep": 18,
217 "sink": 71,
218 "skateboard": 36,
219 "skis": 30,
220 "snowboard": 31,
221 "sofa": 57,
222 "spoon": 44,
223 "sports ball": 32,
224 "stop sign": 11,
225 "suitcase": 28,
226 "surfboard": 37,
227 "teddy bear": 77,
228 "tennis racket": 38,
229 "tie": 27,
230 "toaster": 70,
231 "toilet": 61,
232 "toothbrush": 79,
233 "traffic light": 9,
234 "train": 6,
235 "truck": 7,
236 "tvmonitor": 62,
237 "umbrella": 25,
238 "vase": 75,
239 "wine glass": 40,
240 "zebra": 22
241 },
242 "label_noise_ratio": 0.5,
243 "layer_norm_eps": 1e-05,
244 "learn_initial_query": false,
245 "matcher_alpha": 0.25,
246 "matcher_bbox_cost": 5.0,
247 "matcher_class_cost": 2.0,
248 "matcher_gamma": 2.0,
249 "matcher_giou_cost": 2.0,
250 "model_type": "rt_detr",
251 "normalize_before": false,
252 "num_denoising": 100,
253 "num_feature_levels": 3,
254 "num_queries": 300,
255 "positional_encoding_temperature": 10000,
256 "torch_dtype": "float32",
257 "transformers_version": "4.42.0.dev0",
258 "use_focal_loss": true,
259 "use_pretrained_backbone": false,
260 "use_timm_backbone": false,
261 "weight_loss_bbox": 5.0,
262 "weight_loss_giou": 2.0,
263 "weight_loss_vfl": 1.0,
264 "with_box_refine": true
265 }
266