config.json
5.1 KB · 259 lines · json Raw
1 {
2 "activation_dropout": 0.0,
3 "activation_function": "silu",
4 "anchor_image_size": null,
5 "architectures": [
6 "RTDetrForObjectDetection"
7 ],
8 "attention_dropout": 0.0,
9 "auxiliary_loss": true,
10 "backbone": null,
11 "backbone_config": {
12 "depths": [
13 3,
14 4,
15 23,
16 3
17 ],
18 "model_type": "rt_detr_resnet",
19 "out_features": [
20 "stage2",
21 "stage3",
22 "stage4"
23 ],
24 "out_indices": [
25 2,
26 3,
27 4
28 ]
29 },
30 "backbone_kwargs": null,
31 "batch_norm_eps": 1e-05,
32 "box_noise_scale": 1.0,
33 "d_model": 256,
34 "decoder_activation_function": "relu",
35 "decoder_attention_heads": 8,
36 "decoder_ffn_dim": 1024,
37 "decoder_in_channels": [
38 384,
39 384,
40 384
41 ],
42 "decoder_layers": 6,
43 "decoder_n_points": 4,
44 "disable_custom_kernels": true,
45 "dropout": 0.0,
46 "encode_proj_layers": [
47 2
48 ],
49 "encoder_activation_function": "gelu",
50 "encoder_attention_heads": 8,
51 "encoder_ffn_dim": 2048,
52 "encoder_hidden_dim": 384,
53 "encoder_in_channels": [
54 512,
55 1024,
56 2048
57 ],
58 "encoder_layers": 1,
59 "eos_coefficient": 0.0001,
60 "eval_size": null,
61 "feat_strides": [
62 8,
63 16,
64 32
65 ],
66 "focal_loss_alpha": 0.75,
67 "focal_loss_gamma": 2.0,
68 "hidden_expansion": 1.0,
69 "id2label": {
70 "0": "person",
71 "1": "bicycle",
72 "2": "car",
73 "3": "motorbike",
74 "4": "aeroplane",
75 "5": "bus",
76 "6": "train",
77 "7": "truck",
78 "8": "boat",
79 "9": "traffic light",
80 "10": "fire hydrant",
81 "11": "stop sign",
82 "12": "parking meter",
83 "13": "bench",
84 "14": "bird",
85 "15": "cat",
86 "16": "dog",
87 "17": "horse",
88 "18": "sheep",
89 "19": "cow",
90 "20": "elephant",
91 "21": "bear",
92 "22": "zebra",
93 "23": "giraffe",
94 "24": "backpack",
95 "25": "umbrella",
96 "26": "handbag",
97 "27": "tie",
98 "28": "suitcase",
99 "29": "frisbee",
100 "30": "skis",
101 "31": "snowboard",
102 "32": "sports ball",
103 "33": "kite",
104 "34": "baseball bat",
105 "35": "baseball glove",
106 "36": "skateboard",
107 "37": "surfboard",
108 "38": "tennis racket",
109 "39": "bottle",
110 "40": "wine glass",
111 "41": "cup",
112 "42": "fork",
113 "43": "knife",
114 "44": "spoon",
115 "45": "bowl",
116 "46": "banana",
117 "47": "apple",
118 "48": "sandwich",
119 "49": "orange",
120 "50": "broccoli",
121 "51": "carrot",
122 "52": "hot dog",
123 "53": "pizza",
124 "54": "donut",
125 "55": "cake",
126 "56": "chair",
127 "57": "sofa",
128 "58": "pottedplant",
129 "59": "bed",
130 "60": "diningtable",
131 "61": "toilet",
132 "62": "tvmonitor",
133 "63": "laptop",
134 "64": "mouse",
135 "65": "remote",
136 "66": "keyboard",
137 "67": "cell phone",
138 "68": "microwave",
139 "69": "oven",
140 "70": "toaster",
141 "71": "sink",
142 "72": "refrigerator",
143 "73": "book",
144 "74": "clock",
145 "75": "vase",
146 "76": "scissors",
147 "77": "teddy bear",
148 "78": "hair drier",
149 "79": "toothbrush"
150 },
151 "initializer_range": 0.01,
152 "is_encoder_decoder": true,
153 "label2id": {
154 "aeroplane": 4,
155 "apple": 47,
156 "backpack": 24,
157 "banana": 46,
158 "baseball bat": 34,
159 "baseball glove": 35,
160 "bear": 21,
161 "bed": 59,
162 "bench": 13,
163 "bicycle": 1,
164 "bird": 14,
165 "boat": 8,
166 "book": 73,
167 "bottle": 39,
168 "bowl": 45,
169 "broccoli": 50,
170 "bus": 5,
171 "cake": 55,
172 "car": 2,
173 "carrot": 51,
174 "cat": 15,
175 "cell phone": 67,
176 "chair": 56,
177 "clock": 74,
178 "cow": 19,
179 "cup": 41,
180 "diningtable": 60,
181 "dog": 16,
182 "donut": 54,
183 "elephant": 20,
184 "fire hydrant": 10,
185 "fork": 42,
186 "frisbee": 29,
187 "giraffe": 23,
188 "hair drier": 78,
189 "handbag": 26,
190 "horse": 17,
191 "hot dog": 52,
192 "keyboard": 66,
193 "kite": 33,
194 "knife": 43,
195 "laptop": 63,
196 "microwave": 68,
197 "motorbike": 3,
198 "mouse": 64,
199 "orange": 49,
200 "oven": 69,
201 "parking meter": 12,
202 "person": 0,
203 "pizza": 53,
204 "pottedplant": 58,
205 "refrigerator": 72,
206 "remote": 65,
207 "sandwich": 48,
208 "scissors": 76,
209 "sheep": 18,
210 "sink": 71,
211 "skateboard": 36,
212 "skis": 30,
213 "snowboard": 31,
214 "sofa": 57,
215 "spoon": 44,
216 "sports ball": 32,
217 "stop sign": 11,
218 "suitcase": 28,
219 "surfboard": 37,
220 "teddy bear": 77,
221 "tennis racket": 38,
222 "tie": 27,
223 "toaster": 70,
224 "toilet": 61,
225 "toothbrush": 79,
226 "traffic light": 9,
227 "train": 6,
228 "truck": 7,
229 "tvmonitor": 62,
230 "umbrella": 25,
231 "vase": 75,
232 "wine glass": 40,
233 "zebra": 22
234 },
235 "label_noise_ratio": 0.5,
236 "layer_norm_eps": 1e-05,
237 "learn_initial_query": false,
238 "matcher_alpha": 0.25,
239 "matcher_bbox_cost": 5.0,
240 "matcher_class_cost": 2.0,
241 "matcher_gamma": 2.0,
242 "matcher_giou_cost": 2.0,
243 "model_type": "rt_detr",
244 "normalize_before": false,
245 "num_denoising": 100,
246 "num_feature_levels": 3,
247 "num_queries": 300,
248 "positional_encoding_temperature": 10000,
249 "torch_dtype": "float32",
250 "transformers_version": "4.42.0.dev0",
251 "use_focal_loss": true,
252 "use_pretrained_backbone": false,
253 "use_timm_backbone": false,
254 "weight_loss_bbox": 5.0,
255 "weight_loss_giou": 2.0,
256 "weight_loss_vfl": 1.0,
257 "with_box_refine": true
258 }
259