config.json
| 1 | { |
| 2 | "activation_dropout": 0.0, |
| 3 | "activation_function": "silu", |
| 4 | "anchor_image_size": null, |
| 5 | "architectures": [ |
| 6 | "RTDetrForObjectDetection" |
| 7 | ], |
| 8 | "attention_dropout": 0.0, |
| 9 | "auxiliary_loss": true, |
| 10 | "backbone": null, |
| 11 | "backbone_config": { |
| 12 | "depths": [ |
| 13 | 3, |
| 14 | 4, |
| 15 | 23, |
| 16 | 3 |
| 17 | ], |
| 18 | "model_type": "rt_detr_resnet", |
| 19 | "out_features": [ |
| 20 | "stage2", |
| 21 | "stage3", |
| 22 | "stage4" |
| 23 | ], |
| 24 | "out_indices": [ |
| 25 | 2, |
| 26 | 3, |
| 27 | 4 |
| 28 | ] |
| 29 | }, |
| 30 | "backbone_kwargs": null, |
| 31 | "batch_norm_eps": 1e-05, |
| 32 | "box_noise_scale": 1.0, |
| 33 | "d_model": 256, |
| 34 | "decoder_activation_function": "relu", |
| 35 | "decoder_attention_heads": 8, |
| 36 | "decoder_ffn_dim": 1024, |
| 37 | "decoder_in_channels": [ |
| 38 | 384, |
| 39 | 384, |
| 40 | 384 |
| 41 | ], |
| 42 | "decoder_layers": 6, |
| 43 | "decoder_n_points": 4, |
| 44 | "disable_custom_kernels": true, |
| 45 | "dropout": 0.0, |
| 46 | "encode_proj_layers": [ |
| 47 | 2 |
| 48 | ], |
| 49 | "encoder_activation_function": "gelu", |
| 50 | "encoder_attention_heads": 8, |
| 51 | "encoder_ffn_dim": 2048, |
| 52 | "encoder_hidden_dim": 384, |
| 53 | "encoder_in_channels": [ |
| 54 | 512, |
| 55 | 1024, |
| 56 | 2048 |
| 57 | ], |
| 58 | "encoder_layers": 1, |
| 59 | "eos_coefficient": 0.0001, |
| 60 | "eval_size": null, |
| 61 | "feat_strides": [ |
| 62 | 8, |
| 63 | 16, |
| 64 | 32 |
| 65 | ], |
| 66 | "focal_loss_alpha": 0.75, |
| 67 | "focal_loss_gamma": 2.0, |
| 68 | "hidden_expansion": 1.0, |
| 69 | "id2label": { |
| 70 | "0": "person", |
| 71 | "1": "bicycle", |
| 72 | "2": "car", |
| 73 | "3": "motorbike", |
| 74 | "4": "aeroplane", |
| 75 | "5": "bus", |
| 76 | "6": "train", |
| 77 | "7": "truck", |
| 78 | "8": "boat", |
| 79 | "9": "traffic light", |
| 80 | "10": "fire hydrant", |
| 81 | "11": "stop sign", |
| 82 | "12": "parking meter", |
| 83 | "13": "bench", |
| 84 | "14": "bird", |
| 85 | "15": "cat", |
| 86 | "16": "dog", |
| 87 | "17": "horse", |
| 88 | "18": "sheep", |
| 89 | "19": "cow", |
| 90 | "20": "elephant", |
| 91 | "21": "bear", |
| 92 | "22": "zebra", |
| 93 | "23": "giraffe", |
| 94 | "24": "backpack", |
| 95 | "25": "umbrella", |
| 96 | "26": "handbag", |
| 97 | "27": "tie", |
| 98 | "28": "suitcase", |
| 99 | "29": "frisbee", |
| 100 | "30": "skis", |
| 101 | "31": "snowboard", |
| 102 | "32": "sports ball", |
| 103 | "33": "kite", |
| 104 | "34": "baseball bat", |
| 105 | "35": "baseball glove", |
| 106 | "36": "skateboard", |
| 107 | "37": "surfboard", |
| 108 | "38": "tennis racket", |
| 109 | "39": "bottle", |
| 110 | "40": "wine glass", |
| 111 | "41": "cup", |
| 112 | "42": "fork", |
| 113 | "43": "knife", |
| 114 | "44": "spoon", |
| 115 | "45": "bowl", |
| 116 | "46": "banana", |
| 117 | "47": "apple", |
| 118 | "48": "sandwich", |
| 119 | "49": "orange", |
| 120 | "50": "broccoli", |
| 121 | "51": "carrot", |
| 122 | "52": "hot dog", |
| 123 | "53": "pizza", |
| 124 | "54": "donut", |
| 125 | "55": "cake", |
| 126 | "56": "chair", |
| 127 | "57": "sofa", |
| 128 | "58": "pottedplant", |
| 129 | "59": "bed", |
| 130 | "60": "diningtable", |
| 131 | "61": "toilet", |
| 132 | "62": "tvmonitor", |
| 133 | "63": "laptop", |
| 134 | "64": "mouse", |
| 135 | "65": "remote", |
| 136 | "66": "keyboard", |
| 137 | "67": "cell phone", |
| 138 | "68": "microwave", |
| 139 | "69": "oven", |
| 140 | "70": "toaster", |
| 141 | "71": "sink", |
| 142 | "72": "refrigerator", |
| 143 | "73": "book", |
| 144 | "74": "clock", |
| 145 | "75": "vase", |
| 146 | "76": "scissors", |
| 147 | "77": "teddy bear", |
| 148 | "78": "hair drier", |
| 149 | "79": "toothbrush" |
| 150 | }, |
| 151 | "initializer_range": 0.01, |
| 152 | "is_encoder_decoder": true, |
| 153 | "label2id": { |
| 154 | "aeroplane": 4, |
| 155 | "apple": 47, |
| 156 | "backpack": 24, |
| 157 | "banana": 46, |
| 158 | "baseball bat": 34, |
| 159 | "baseball glove": 35, |
| 160 | "bear": 21, |
| 161 | "bed": 59, |
| 162 | "bench": 13, |
| 163 | "bicycle": 1, |
| 164 | "bird": 14, |
| 165 | "boat": 8, |
| 166 | "book": 73, |
| 167 | "bottle": 39, |
| 168 | "bowl": 45, |
| 169 | "broccoli": 50, |
| 170 | "bus": 5, |
| 171 | "cake": 55, |
| 172 | "car": 2, |
| 173 | "carrot": 51, |
| 174 | "cat": 15, |
| 175 | "cell phone": 67, |
| 176 | "chair": 56, |
| 177 | "clock": 74, |
| 178 | "cow": 19, |
| 179 | "cup": 41, |
| 180 | "diningtable": 60, |
| 181 | "dog": 16, |
| 182 | "donut": 54, |
| 183 | "elephant": 20, |
| 184 | "fire hydrant": 10, |
| 185 | "fork": 42, |
| 186 | "frisbee": 29, |
| 187 | "giraffe": 23, |
| 188 | "hair drier": 78, |
| 189 | "handbag": 26, |
| 190 | "horse": 17, |
| 191 | "hot dog": 52, |
| 192 | "keyboard": 66, |
| 193 | "kite": 33, |
| 194 | "knife": 43, |
| 195 | "laptop": 63, |
| 196 | "microwave": 68, |
| 197 | "motorbike": 3, |
| 198 | "mouse": 64, |
| 199 | "orange": 49, |
| 200 | "oven": 69, |
| 201 | "parking meter": 12, |
| 202 | "person": 0, |
| 203 | "pizza": 53, |
| 204 | "pottedplant": 58, |
| 205 | "refrigerator": 72, |
| 206 | "remote": 65, |
| 207 | "sandwich": 48, |
| 208 | "scissors": 76, |
| 209 | "sheep": 18, |
| 210 | "sink": 71, |
| 211 | "skateboard": 36, |
| 212 | "skis": 30, |
| 213 | "snowboard": 31, |
| 214 | "sofa": 57, |
| 215 | "spoon": 44, |
| 216 | "sports ball": 32, |
| 217 | "stop sign": 11, |
| 218 | "suitcase": 28, |
| 219 | "surfboard": 37, |
| 220 | "teddy bear": 77, |
| 221 | "tennis racket": 38, |
| 222 | "tie": 27, |
| 223 | "toaster": 70, |
| 224 | "toilet": 61, |
| 225 | "toothbrush": 79, |
| 226 | "traffic light": 9, |
| 227 | "train": 6, |
| 228 | "truck": 7, |
| 229 | "tvmonitor": 62, |
| 230 | "umbrella": 25, |
| 231 | "vase": 75, |
| 232 | "wine glass": 40, |
| 233 | "zebra": 22 |
| 234 | }, |
| 235 | "label_noise_ratio": 0.5, |
| 236 | "layer_norm_eps": 1e-05, |
| 237 | "learn_initial_query": false, |
| 238 | "matcher_alpha": 0.25, |
| 239 | "matcher_bbox_cost": 5.0, |
| 240 | "matcher_class_cost": 2.0, |
| 241 | "matcher_gamma": 2.0, |
| 242 | "matcher_giou_cost": 2.0, |
| 243 | "model_type": "rt_detr", |
| 244 | "normalize_before": false, |
| 245 | "num_denoising": 100, |
| 246 | "num_feature_levels": 3, |
| 247 | "num_queries": 300, |
| 248 | "positional_encoding_temperature": 10000, |
| 249 | "torch_dtype": "float32", |
| 250 | "transformers_version": "4.42.0.dev0", |
| 251 | "use_focal_loss": true, |
| 252 | "use_pretrained_backbone": false, |
| 253 | "use_timm_backbone": false, |
| 254 | "weight_loss_bbox": 5.0, |
| 255 | "weight_loss_giou": 2.0, |
| 256 | "weight_loss_vfl": 1.0, |
| 257 | "with_box_refine": true |
| 258 | } |
| 259 | |