config.json
5.0 KB · 253 lines · json Raw
1 {
2 "activation_dropout": 0.0,
3 "activation_function": "silu",
4 "anchor_image_size": null,
5 "architectures": [
6 "RTDetrForObjectDetection"
7 ],
8 "attention_dropout": 0.0,
9 "auxiliary_loss": true,
10 "backbone": null,
11 "backbone_config": {
12 "model_type": "rt_detr_resnet",
13 "out_features": [
14 "stage2",
15 "stage3",
16 "stage4"
17 ],
18 "out_indices": [
19 2,
20 3,
21 4
22 ]
23 },
24 "backbone_kwargs": null,
25 "batch_norm_eps": 1e-05,
26 "box_noise_scale": 1.0,
27 "d_model": 256,
28 "decoder_activation_function": "relu",
29 "decoder_attention_heads": 8,
30 "decoder_ffn_dim": 1024,
31 "decoder_in_channels": [
32 256,
33 256,
34 256
35 ],
36 "decoder_layers": 6,
37 "decoder_n_points": 4,
38 "disable_custom_kernels": true,
39 "dropout": 0.0,
40 "encode_proj_layers": [
41 2
42 ],
43 "encoder_activation_function": "gelu",
44 "encoder_attention_heads": 8,
45 "encoder_ffn_dim": 1024,
46 "encoder_hidden_dim": 256,
47 "encoder_in_channels": [
48 512,
49 1024,
50 2048
51 ],
52 "encoder_layers": 1,
53 "eos_coefficient": 0.0001,
54 "eval_size": null,
55 "feat_strides": [
56 8,
57 16,
58 32
59 ],
60 "focal_loss_alpha": 0.75,
61 "focal_loss_gamma": 2.0,
62 "hidden_expansion": 1.0,
63 "id2label": {
64 "0": "person",
65 "1": "bicycle",
66 "2": "car",
67 "3": "motorbike",
68 "4": "aeroplane",
69 "5": "bus",
70 "6": "train",
71 "7": "truck",
72 "8": "boat",
73 "9": "traffic light",
74 "10": "fire hydrant",
75 "11": "stop sign",
76 "12": "parking meter",
77 "13": "bench",
78 "14": "bird",
79 "15": "cat",
80 "16": "dog",
81 "17": "horse",
82 "18": "sheep",
83 "19": "cow",
84 "20": "elephant",
85 "21": "bear",
86 "22": "zebra",
87 "23": "giraffe",
88 "24": "backpack",
89 "25": "umbrella",
90 "26": "handbag",
91 "27": "tie",
92 "28": "suitcase",
93 "29": "frisbee",
94 "30": "skis",
95 "31": "snowboard",
96 "32": "sports ball",
97 "33": "kite",
98 "34": "baseball bat",
99 "35": "baseball glove",
100 "36": "skateboard",
101 "37": "surfboard",
102 "38": "tennis racket",
103 "39": "bottle",
104 "40": "wine glass",
105 "41": "cup",
106 "42": "fork",
107 "43": "knife",
108 "44": "spoon",
109 "45": "bowl",
110 "46": "banana",
111 "47": "apple",
112 "48": "sandwich",
113 "49": "orange",
114 "50": "broccoli",
115 "51": "carrot",
116 "52": "hot dog",
117 "53": "pizza",
118 "54": "donut",
119 "55": "cake",
120 "56": "chair",
121 "57": "sofa",
122 "58": "pottedplant",
123 "59": "bed",
124 "60": "diningtable",
125 "61": "toilet",
126 "62": "tvmonitor",
127 "63": "laptop",
128 "64": "mouse",
129 "65": "remote",
130 "66": "keyboard",
131 "67": "cell phone",
132 "68": "microwave",
133 "69": "oven",
134 "70": "toaster",
135 "71": "sink",
136 "72": "refrigerator",
137 "73": "book",
138 "74": "clock",
139 "75": "vase",
140 "76": "scissors",
141 "77": "teddy bear",
142 "78": "hair drier",
143 "79": "toothbrush"
144 },
145 "initializer_range": 0.01,
146 "is_encoder_decoder": true,
147 "label2id": {
148 "aeroplane": 4,
149 "apple": 47,
150 "backpack": 24,
151 "banana": 46,
152 "baseball bat": 34,
153 "baseball glove": 35,
154 "bear": 21,
155 "bed": 59,
156 "bench": 13,
157 "bicycle": 1,
158 "bird": 14,
159 "boat": 8,
160 "book": 73,
161 "bottle": 39,
162 "bowl": 45,
163 "broccoli": 50,
164 "bus": 5,
165 "cake": 55,
166 "car": 2,
167 "carrot": 51,
168 "cat": 15,
169 "cell phone": 67,
170 "chair": 56,
171 "clock": 74,
172 "cow": 19,
173 "cup": 41,
174 "diningtable": 60,
175 "dog": 16,
176 "donut": 54,
177 "elephant": 20,
178 "fire hydrant": 10,
179 "fork": 42,
180 "frisbee": 29,
181 "giraffe": 23,
182 "hair drier": 78,
183 "handbag": 26,
184 "horse": 17,
185 "hot dog": 52,
186 "keyboard": 66,
187 "kite": 33,
188 "knife": 43,
189 "laptop": 63,
190 "microwave": 68,
191 "motorbike": 3,
192 "mouse": 64,
193 "orange": 49,
194 "oven": 69,
195 "parking meter": 12,
196 "person": 0,
197 "pizza": 53,
198 "pottedplant": 58,
199 "refrigerator": 72,
200 "remote": 65,
201 "sandwich": 48,
202 "scissors": 76,
203 "sheep": 18,
204 "sink": 71,
205 "skateboard": 36,
206 "skis": 30,
207 "snowboard": 31,
208 "sofa": 57,
209 "spoon": 44,
210 "sports ball": 32,
211 "stop sign": 11,
212 "suitcase": 28,
213 "surfboard": 37,
214 "teddy bear": 77,
215 "tennis racket": 38,
216 "tie": 27,
217 "toaster": 70,
218 "toilet": 61,
219 "toothbrush": 79,
220 "traffic light": 9,
221 "train": 6,
222 "truck": 7,
223 "tvmonitor": 62,
224 "umbrella": 25,
225 "vase": 75,
226 "wine glass": 40,
227 "zebra": 22
228 },
229 "label_noise_ratio": 0.5,
230 "layer_norm_eps": 1e-05,
231 "learn_initial_query": false,
232 "matcher_alpha": 0.25,
233 "matcher_bbox_cost": 5.0,
234 "matcher_class_cost": 2.0,
235 "matcher_gamma": 2.0,
236 "matcher_giou_cost": 2.0,
237 "model_type": "rt_detr",
238 "normalize_before": false,
239 "num_denoising": 100,
240 "num_feature_levels": 3,
241 "num_queries": 300,
242 "positional_encoding_temperature": 10000,
243 "torch_dtype": "float32",
244 "transformers_version": "4.42.0.dev0",
245 "use_focal_loss": true,
246 "use_pretrained_backbone": false,
247 "use_timm_backbone": false,
248 "weight_loss_bbox": 5.0,
249 "weight_loss_giou": 2.0,
250 "weight_loss_vfl": 1.0,
251 "with_box_refine": true
252 }
253