config.json
26.1 KB · 1082 lines · json Raw
1 {
2 "architectures": [
3 "ASTForAudioClassification"
4 ],
5 "attention_probs_dropout_prob": 0.0,
6 "frequency_stride": 10,
7 "hidden_act": "gelu",
8 "hidden_dropout_prob": 0.0,
9 "hidden_size": 768,
10 "id2label": {
11 "0": "Speech",
12 "1": "Male speech, man speaking",
13 "2": "Female speech, woman speaking",
14 "3": "Child speech, kid speaking",
15 "4": "Conversation",
16 "5": "Narration, monologue",
17 "6": "Babbling",
18 "7": "Speech synthesizer",
19 "8": "Shout",
20 "9": "Bellow",
21 "10": "Whoop",
22 "11": "Yell",
23 "12": "Battle cry",
24 "13": "Children shouting",
25 "14": "Screaming",
26 "15": "Whispering",
27 "16": "Laughter",
28 "17": "Baby laughter",
29 "18": "Giggle",
30 "19": "Snicker",
31 "20": "Belly laugh",
32 "21": "Chuckle, chortle",
33 "22": "Crying, sobbing",
34 "23": "Baby cry, infant cry",
35 "24": "Whimper",
36 "25": "Wail, moan",
37 "26": "Sigh",
38 "27": "Singing",
39 "28": "Choir",
40 "29": "Yodeling",
41 "30": "Chant",
42 "31": "Mantra",
43 "32": "Male singing",
44 "33": "Female singing",
45 "34": "Child singing",
46 "35": "Synthetic singing",
47 "36": "Rapping",
48 "37": "Humming",
49 "38": "Groan",
50 "39": "Grunt",
51 "40": "Whistling",
52 "41": "Breathing",
53 "42": "Wheeze",
54 "43": "Snoring",
55 "44": "Gasp",
56 "45": "Pant",
57 "46": "Snort",
58 "47": "Cough",
59 "48": "Throat clearing",
60 "49": "Sneeze",
61 "50": "Sniff",
62 "51": "Run",
63 "52": "Shuffle",
64 "53": "Walk, footsteps",
65 "54": "Chewing, mastication",
66 "55": "Biting",
67 "56": "Gargling",
68 "57": "Stomach rumble",
69 "58": "Burping, eructation",
70 "59": "Hiccup",
71 "60": "Fart",
72 "61": "Hands",
73 "62": "Finger snapping",
74 "63": "Clapping",
75 "64": "Heart sounds, heartbeat",
76 "65": "Heart murmur",
77 "66": "Cheering",
78 "67": "Applause",
79 "68": "Chatter",
80 "69": "Crowd",
81 "70": "Hubbub, speech noise, speech babble",
82 "71": "Children playing",
83 "72": "Animal",
84 "73": "Domestic animals, pets",
85 "74": "Dog",
86 "75": "Bark",
87 "76": "Yip",
88 "77": "Howl",
89 "78": "Bow-wow",
90 "79": "Growling",
91 "80": "Whimper (dog)",
92 "81": "Cat",
93 "82": "Purr",
94 "83": "Meow",
95 "84": "Hiss",
96 "85": "Caterwaul",
97 "86": "Livestock, farm animals, working animals",
98 "87": "Horse",
99 "88": "Clip-clop",
100 "89": "Neigh, whinny",
101 "90": "Cattle, bovinae",
102 "91": "Moo",
103 "92": "Cowbell",
104 "93": "Pig",
105 "94": "Oink",
106 "95": "Goat",
107 "96": "Bleat",
108 "97": "Sheep",
109 "98": "Fowl",
110 "99": "Chicken, rooster",
111 "100": "Cluck",
112 "101": "Crowing, cock-a-doodle-doo",
113 "102": "Turkey",
114 "103": "Gobble",
115 "104": "Duck",
116 "105": "Quack",
117 "106": "Goose",
118 "107": "Honk",
119 "108": "Wild animals",
120 "109": "Roaring cats (lions, tigers)",
121 "110": "Roar",
122 "111": "Bird",
123 "112": "Bird vocalization, bird call, bird song",
124 "113": "Chirp, tweet",
125 "114": "Squawk",
126 "115": "Pigeon, dove",
127 "116": "Coo",
128 "117": "Crow",
129 "118": "Caw",
130 "119": "Owl",
131 "120": "Hoot",
132 "121": "Bird flight, flapping wings",
133 "122": "Canidae, dogs, wolves",
134 "123": "Rodents, rats, mice",
135 "124": "Mouse",
136 "125": "Patter",
137 "126": "Insect",
138 "127": "Cricket",
139 "128": "Mosquito",
140 "129": "Fly, housefly",
141 "130": "Buzz",
142 "131": "Bee, wasp, etc.",
143 "132": "Frog",
144 "133": "Croak",
145 "134": "Snake",
146 "135": "Rattle",
147 "136": "Whale vocalization",
148 "137": "Music",
149 "138": "Musical instrument",
150 "139": "Plucked string instrument",
151 "140": "Guitar",
152 "141": "Electric guitar",
153 "142": "Bass guitar",
154 "143": "Acoustic guitar",
155 "144": "Steel guitar, slide guitar",
156 "145": "Tapping (guitar technique)",
157 "146": "Strum",
158 "147": "Banjo",
159 "148": "Sitar",
160 "149": "Mandolin",
161 "150": "Zither",
162 "151": "Ukulele",
163 "152": "Keyboard (musical)",
164 "153": "Piano",
165 "154": "Electric piano",
166 "155": "Organ",
167 "156": "Electronic organ",
168 "157": "Hammond organ",
169 "158": "Synthesizer",
170 "159": "Sampler",
171 "160": "Harpsichord",
172 "161": "Percussion",
173 "162": "Drum kit",
174 "163": "Drum machine",
175 "164": "Drum",
176 "165": "Snare drum",
177 "166": "Rimshot",
178 "167": "Drum roll",
179 "168": "Bass drum",
180 "169": "Timpani",
181 "170": "Tabla",
182 "171": "Cymbal",
183 "172": "Hi-hat",
184 "173": "Wood block",
185 "174": "Tambourine",
186 "175": "Rattle (instrument)",
187 "176": "Maraca",
188 "177": "Gong",
189 "178": "Tubular bells",
190 "179": "Mallet percussion",
191 "180": "Marimba, xylophone",
192 "181": "Glockenspiel",
193 "182": "Vibraphone",
194 "183": "Steelpan",
195 "184": "Orchestra",
196 "185": "Brass instrument",
197 "186": "French horn",
198 "187": "Trumpet",
199 "188": "Trombone",
200 "189": "Bowed string instrument",
201 "190": "String section",
202 "191": "Violin, fiddle",
203 "192": "Pizzicato",
204 "193": "Cello",
205 "194": "Double bass",
206 "195": "Wind instrument, woodwind instrument",
207 "196": "Flute",
208 "197": "Saxophone",
209 "198": "Clarinet",
210 "199": "Harp",
211 "200": "Bell",
212 "201": "Church bell",
213 "202": "Jingle bell",
214 "203": "Bicycle bell",
215 "204": "Tuning fork",
216 "205": "Chime",
217 "206": "Wind chime",
218 "207": "Change ringing (campanology)",
219 "208": "Harmonica",
220 "209": "Accordion",
221 "210": "Bagpipes",
222 "211": "Didgeridoo",
223 "212": "Shofar",
224 "213": "Theremin",
225 "214": "Singing bowl",
226 "215": "Scratching (performance technique)",
227 "216": "Pop music",
228 "217": "Hip hop music",
229 "218": "Beatboxing",
230 "219": "Rock music",
231 "220": "Heavy metal",
232 "221": "Punk rock",
233 "222": "Grunge",
234 "223": "Progressive rock",
235 "224": "Rock and roll",
236 "225": "Psychedelic rock",
237 "226": "Rhythm and blues",
238 "227": "Soul music",
239 "228": "Reggae",
240 "229": "Country",
241 "230": "Swing music",
242 "231": "Bluegrass",
243 "232": "Funk",
244 "233": "Folk music",
245 "234": "Middle Eastern music",
246 "235": "Jazz",
247 "236": "Disco",
248 "237": "Classical music",
249 "238": "Opera",
250 "239": "Electronic music",
251 "240": "House music",
252 "241": "Techno",
253 "242": "Dubstep",
254 "243": "Drum and bass",
255 "244": "Electronica",
256 "245": "Electronic dance music",
257 "246": "Ambient music",
258 "247": "Trance music",
259 "248": "Music of Latin America",
260 "249": "Salsa music",
261 "250": "Flamenco",
262 "251": "Blues",
263 "252": "Music for children",
264 "253": "New-age music",
265 "254": "Vocal music",
266 "255": "A capella",
267 "256": "Music of Africa",
268 "257": "Afrobeat",
269 "258": "Christian music",
270 "259": "Gospel music",
271 "260": "Music of Asia",
272 "261": "Carnatic music",
273 "262": "Music of Bollywood",
274 "263": "Ska",
275 "264": "Traditional music",
276 "265": "Independent music",
277 "266": "Song",
278 "267": "Background music",
279 "268": "Theme music",
280 "269": "Jingle (music)",
281 "270": "Soundtrack music",
282 "271": "Lullaby",
283 "272": "Video game music",
284 "273": "Christmas music",
285 "274": "Dance music",
286 "275": "Wedding music",
287 "276": "Happy music",
288 "277": "Funny music",
289 "278": "Sad music",
290 "279": "Tender music",
291 "280": "Exciting music",
292 "281": "Angry music",
293 "282": "Scary music",
294 "283": "Wind",
295 "284": "Rustling leaves",
296 "285": "Wind noise (microphone)",
297 "286": "Thunderstorm",
298 "287": "Thunder",
299 "288": "Water",
300 "289": "Rain",
301 "290": "Raindrop",
302 "291": "Rain on surface",
303 "292": "Stream",
304 "293": "Waterfall",
305 "294": "Ocean",
306 "295": "Waves, surf",
307 "296": "Steam",
308 "297": "Gurgling",
309 "298": "Fire",
310 "299": "Crackle",
311 "300": "Vehicle",
312 "301": "Boat, Water vehicle",
313 "302": "Sailboat, sailing ship",
314 "303": "Rowboat, canoe, kayak",
315 "304": "Motorboat, speedboat",
316 "305": "Ship",
317 "306": "Motor vehicle (road)",
318 "307": "Car",
319 "308": "Vehicle horn, car horn, honking",
320 "309": "Toot",
321 "310": "Car alarm",
322 "311": "Power windows, electric windows",
323 "312": "Skidding",
324 "313": "Tire squeal",
325 "314": "Car passing by",
326 "315": "Race car, auto racing",
327 "316": "Truck",
328 "317": "Air brake",
329 "318": "Air horn, truck horn",
330 "319": "Reversing beeps",
331 "320": "Ice cream truck, ice cream van",
332 "321": "Bus",
333 "322": "Emergency vehicle",
334 "323": "Police car (siren)",
335 "324": "Ambulance (siren)",
336 "325": "Fire engine, fire truck (siren)",
337 "326": "Motorcycle",
338 "327": "Traffic noise, roadway noise",
339 "328": "Rail transport",
340 "329": "Train",
341 "330": "Train whistle",
342 "331": "Train horn",
343 "332": "Railroad car, train wagon",
344 "333": "Train wheels squealing",
345 "334": "Subway, metro, underground",
346 "335": "Aircraft",
347 "336": "Aircraft engine",
348 "337": "Jet engine",
349 "338": "Propeller, airscrew",
350 "339": "Helicopter",
351 "340": "Fixed-wing aircraft, airplane",
352 "341": "Bicycle",
353 "342": "Skateboard",
354 "343": "Engine",
355 "344": "Light engine (high frequency)",
356 "345": "Dental drill, dentist's drill",
357 "346": "Lawn mower",
358 "347": "Chainsaw",
359 "348": "Medium engine (mid frequency)",
360 "349": "Heavy engine (low frequency)",
361 "350": "Engine knocking",
362 "351": "Engine starting",
363 "352": "Idling",
364 "353": "Accelerating, revving, vroom",
365 "354": "Door",
366 "355": "Doorbell",
367 "356": "Ding-dong",
368 "357": "Sliding door",
369 "358": "Slam",
370 "359": "Knock",
371 "360": "Tap",
372 "361": "Squeak",
373 "362": "Cupboard open or close",
374 "363": "Drawer open or close",
375 "364": "Dishes, pots, and pans",
376 "365": "Cutlery, silverware",
377 "366": "Chopping (food)",
378 "367": "Frying (food)",
379 "368": "Microwave oven",
380 "369": "Blender",
381 "370": "Water tap, faucet",
382 "371": "Sink (filling or washing)",
383 "372": "Bathtub (filling or washing)",
384 "373": "Hair dryer",
385 "374": "Toilet flush",
386 "375": "Toothbrush",
387 "376": "Electric toothbrush",
388 "377": "Vacuum cleaner",
389 "378": "Zipper (clothing)",
390 "379": "Keys jangling",
391 "380": "Coin (dropping)",
392 "381": "Scissors",
393 "382": "Electric shaver, electric razor",
394 "383": "Shuffling cards",
395 "384": "Typing",
396 "385": "Typewriter",
397 "386": "Computer keyboard",
398 "387": "Writing",
399 "388": "Alarm",
400 "389": "Telephone",
401 "390": "Telephone bell ringing",
402 "391": "Ringtone",
403 "392": "Telephone dialing, DTMF",
404 "393": "Dial tone",
405 "394": "Busy signal",
406 "395": "Alarm clock",
407 "396": "Siren",
408 "397": "Civil defense siren",
409 "398": "Buzzer",
410 "399": "Smoke detector, smoke alarm",
411 "400": "Fire alarm",
412 "401": "Foghorn",
413 "402": "Whistle",
414 "403": "Steam whistle",
415 "404": "Mechanisms",
416 "405": "Ratchet, pawl",
417 "406": "Clock",
418 "407": "Tick",
419 "408": "Tick-tock",
420 "409": "Gears",
421 "410": "Pulleys",
422 "411": "Sewing machine",
423 "412": "Mechanical fan",
424 "413": "Air conditioning",
425 "414": "Cash register",
426 "415": "Printer",
427 "416": "Camera",
428 "417": "Single-lens reflex camera",
429 "418": "Tools",
430 "419": "Hammer",
431 "420": "Jackhammer",
432 "421": "Sawing",
433 "422": "Filing (rasp)",
434 "423": "Sanding",
435 "424": "Power tool",
436 "425": "Drill",
437 "426": "Explosion",
438 "427": "Gunshot, gunfire",
439 "428": "Machine gun",
440 "429": "Fusillade",
441 "430": "Artillery fire",
442 "431": "Cap gun",
443 "432": "Fireworks",
444 "433": "Firecracker",
445 "434": "Burst, pop",
446 "435": "Eruption",
447 "436": "Boom",
448 "437": "Wood",
449 "438": "Chop",
450 "439": "Splinter",
451 "440": "Crack",
452 "441": "Glass",
453 "442": "Chink, clink",
454 "443": "Shatter",
455 "444": "Liquid",
456 "445": "Splash, splatter",
457 "446": "Slosh",
458 "447": "Squish",
459 "448": "Drip",
460 "449": "Pour",
461 "450": "Trickle, dribble",
462 "451": "Gush",
463 "452": "Fill (with liquid)",
464 "453": "Spray",
465 "454": "Pump (liquid)",
466 "455": "Stir",
467 "456": "Boiling",
468 "457": "Sonar",
469 "458": "Arrow",
470 "459": "Whoosh, swoosh, swish",
471 "460": "Thump, thud",
472 "461": "Thunk",
473 "462": "Electronic tuner",
474 "463": "Effects unit",
475 "464": "Chorus effect",
476 "465": "Basketball bounce",
477 "466": "Bang",
478 "467": "Slap, smack",
479 "468": "Whack, thwack",
480 "469": "Smash, crash",
481 "470": "Breaking",
482 "471": "Bouncing",
483 "472": "Whip",
484 "473": "Flap",
485 "474": "Scratch",
486 "475": "Scrape",
487 "476": "Rub",
488 "477": "Roll",
489 "478": "Crushing",
490 "479": "Crumpling, crinkling",
491 "480": "Tearing",
492 "481": "Beep, bleep",
493 "482": "Ping",
494 "483": "Ding",
495 "484": "Clang",
496 "485": "Squeal",
497 "486": "Creak",
498 "487": "Rustle",
499 "488": "Whir",
500 "489": "Clatter",
501 "490": "Sizzle",
502 "491": "Clicking",
503 "492": "Clickety-clack",
504 "493": "Rumble",
505 "494": "Plop",
506 "495": "Jingle, tinkle",
507 "496": "Hum",
508 "497": "Zing",
509 "498": "Boing",
510 "499": "Crunch",
511 "500": "Silence",
512 "501": "Sine wave",
513 "502": "Harmonic",
514 "503": "Chirp tone",
515 "504": "Sound effect",
516 "505": "Pulse",
517 "506": "Inside, small room",
518 "507": "Inside, large room or hall",
519 "508": "Inside, public space",
520 "509": "Outside, urban or manmade",
521 "510": "Outside, rural or natural",
522 "511": "Reverberation",
523 "512": "Echo",
524 "513": "Noise",
525 "514": "Environmental noise",
526 "515": "Static",
527 "516": "Mains hum",
528 "517": "Distortion",
529 "518": "Sidetone",
530 "519": "Cacophony",
531 "520": "White noise",
532 "521": "Pink noise",
533 "522": "Throbbing",
534 "523": "Vibration",
535 "524": "Television",
536 "525": "Radio",
537 "526": "Field recording"
538 },
539 "initializer_range": 0.02,
540 "intermediate_size": 3072,
541 "label2id": {
542 "A capella": 255,
543 "Accelerating, revving, vroom": 353,
544 "Accordion": 209,
545 "Acoustic guitar": 143,
546 "Afrobeat": 257,
547 "Air brake": 317,
548 "Air conditioning": 413,
549 "Air horn, truck horn": 318,
550 "Aircraft": 335,
551 "Aircraft engine": 336,
552 "Alarm": 388,
553 "Alarm clock": 395,
554 "Ambient music": 246,
555 "Ambulance (siren)": 324,
556 "Angry music": 281,
557 "Animal": 72,
558 "Applause": 67,
559 "Arrow": 458,
560 "Artillery fire": 430,
561 "Babbling": 6,
562 "Baby cry, infant cry": 23,
563 "Baby laughter": 17,
564 "Background music": 267,
565 "Bagpipes": 210,
566 "Bang": 466,
567 "Banjo": 147,
568 "Bark": 75,
569 "Basketball bounce": 465,
570 "Bass drum": 168,
571 "Bass guitar": 142,
572 "Bathtub (filling or washing)": 372,
573 "Battle cry": 12,
574 "Beatboxing": 218,
575 "Bee, wasp, etc.": 131,
576 "Beep, bleep": 481,
577 "Bell": 200,
578 "Bellow": 9,
579 "Belly laugh": 20,
580 "Bicycle": 341,
581 "Bicycle bell": 203,
582 "Bird": 111,
583 "Bird flight, flapping wings": 121,
584 "Bird vocalization, bird call, bird song": 112,
585 "Biting": 55,
586 "Bleat": 96,
587 "Blender": 369,
588 "Bluegrass": 231,
589 "Blues": 251,
590 "Boat, Water vehicle": 301,
591 "Boiling": 456,
592 "Boing": 498,
593 "Boom": 436,
594 "Bouncing": 471,
595 "Bow-wow": 78,
596 "Bowed string instrument": 189,
597 "Brass instrument": 185,
598 "Breaking": 470,
599 "Breathing": 41,
600 "Burping, eructation": 58,
601 "Burst, pop": 434,
602 "Bus": 321,
603 "Busy signal": 394,
604 "Buzz": 130,
605 "Buzzer": 398,
606 "Cacophony": 519,
607 "Camera": 416,
608 "Canidae, dogs, wolves": 122,
609 "Cap gun": 431,
610 "Car": 307,
611 "Car alarm": 310,
612 "Car passing by": 314,
613 "Carnatic music": 261,
614 "Cash register": 414,
615 "Cat": 81,
616 "Caterwaul": 85,
617 "Cattle, bovinae": 90,
618 "Caw": 118,
619 "Cello": 193,
620 "Chainsaw": 347,
621 "Change ringing (campanology)": 207,
622 "Chant": 30,
623 "Chatter": 68,
624 "Cheering": 66,
625 "Chewing, mastication": 54,
626 "Chicken, rooster": 99,
627 "Child singing": 34,
628 "Child speech, kid speaking": 3,
629 "Children playing": 71,
630 "Children shouting": 13,
631 "Chime": 205,
632 "Chink, clink": 442,
633 "Chirp tone": 503,
634 "Chirp, tweet": 113,
635 "Choir": 28,
636 "Chop": 438,
637 "Chopping (food)": 366,
638 "Chorus effect": 464,
639 "Christian music": 258,
640 "Christmas music": 273,
641 "Chuckle, chortle": 21,
642 "Church bell": 201,
643 "Civil defense siren": 397,
644 "Clang": 484,
645 "Clapping": 63,
646 "Clarinet": 198,
647 "Classical music": 237,
648 "Clatter": 489,
649 "Clickety-clack": 492,
650 "Clicking": 491,
651 "Clip-clop": 88,
652 "Clock": 406,
653 "Cluck": 100,
654 "Coin (dropping)": 380,
655 "Computer keyboard": 386,
656 "Conversation": 4,
657 "Coo": 116,
658 "Cough": 47,
659 "Country": 229,
660 "Cowbell": 92,
661 "Crack": 440,
662 "Crackle": 299,
663 "Creak": 486,
664 "Cricket": 127,
665 "Croak": 133,
666 "Crow": 117,
667 "Crowd": 69,
668 "Crowing, cock-a-doodle-doo": 101,
669 "Crumpling, crinkling": 479,
670 "Crunch": 499,
671 "Crushing": 478,
672 "Crying, sobbing": 22,
673 "Cupboard open or close": 362,
674 "Cutlery, silverware": 365,
675 "Cymbal": 171,
676 "Dance music": 274,
677 "Dental drill, dentist's drill": 345,
678 "Dial tone": 393,
679 "Didgeridoo": 211,
680 "Ding": 483,
681 "Ding-dong": 356,
682 "Disco": 236,
683 "Dishes, pots, and pans": 364,
684 "Distortion": 517,
685 "Dog": 74,
686 "Domestic animals, pets": 73,
687 "Door": 354,
688 "Doorbell": 355,
689 "Double bass": 194,
690 "Drawer open or close": 363,
691 "Drill": 425,
692 "Drip": 448,
693 "Drum": 164,
694 "Drum and bass": 243,
695 "Drum kit": 162,
696 "Drum machine": 163,
697 "Drum roll": 167,
698 "Dubstep": 242,
699 "Duck": 104,
700 "Echo": 512,
701 "Effects unit": 463,
702 "Electric guitar": 141,
703 "Electric piano": 154,
704 "Electric shaver, electric razor": 382,
705 "Electric toothbrush": 376,
706 "Electronic dance music": 245,
707 "Electronic music": 239,
708 "Electronic organ": 156,
709 "Electronic tuner": 462,
710 "Electronica": 244,
711 "Emergency vehicle": 322,
712 "Engine": 343,
713 "Engine knocking": 350,
714 "Engine starting": 351,
715 "Environmental noise": 514,
716 "Eruption": 435,
717 "Exciting music": 280,
718 "Explosion": 426,
719 "Fart": 60,
720 "Female singing": 33,
721 "Female speech, woman speaking": 2,
722 "Field recording": 526,
723 "Filing (rasp)": 422,
724 "Fill (with liquid)": 452,
725 "Finger snapping": 62,
726 "Fire": 298,
727 "Fire alarm": 400,
728 "Fire engine, fire truck (siren)": 325,
729 "Firecracker": 433,
730 "Fireworks": 432,
731 "Fixed-wing aircraft, airplane": 340,
732 "Flamenco": 250,
733 "Flap": 473,
734 "Flute": 196,
735 "Fly, housefly": 129,
736 "Foghorn": 401,
737 "Folk music": 233,
738 "Fowl": 98,
739 "French horn": 186,
740 "Frog": 132,
741 "Frying (food)": 367,
742 "Funk": 232,
743 "Funny music": 277,
744 "Fusillade": 429,
745 "Gargling": 56,
746 "Gasp": 44,
747 "Gears": 409,
748 "Giggle": 18,
749 "Glass": 441,
750 "Glockenspiel": 181,
751 "Goat": 95,
752 "Gobble": 103,
753 "Gong": 177,
754 "Goose": 106,
755 "Gospel music": 259,
756 "Groan": 38,
757 "Growling": 79,
758 "Grunge": 222,
759 "Grunt": 39,
760 "Guitar": 140,
761 "Gunshot, gunfire": 427,
762 "Gurgling": 297,
763 "Gush": 451,
764 "Hair dryer": 373,
765 "Hammer": 419,
766 "Hammond organ": 157,
767 "Hands": 61,
768 "Happy music": 276,
769 "Harmonic": 502,
770 "Harmonica": 208,
771 "Harp": 199,
772 "Harpsichord": 160,
773 "Heart murmur": 65,
774 "Heart sounds, heartbeat": 64,
775 "Heavy engine (low frequency)": 349,
776 "Heavy metal": 220,
777 "Helicopter": 339,
778 "Hi-hat": 172,
779 "Hiccup": 59,
780 "Hip hop music": 217,
781 "Hiss": 84,
782 "Honk": 107,
783 "Hoot": 120,
784 "Horse": 87,
785 "House music": 240,
786 "Howl": 77,
787 "Hubbub, speech noise, speech babble": 70,
788 "Hum": 496,
789 "Humming": 37,
790 "Ice cream truck, ice cream van": 320,
791 "Idling": 352,
792 "Independent music": 265,
793 "Insect": 126,
794 "Inside, large room or hall": 507,
795 "Inside, public space": 508,
796 "Inside, small room": 506,
797 "Jackhammer": 420,
798 "Jazz": 235,
799 "Jet engine": 337,
800 "Jingle (music)": 269,
801 "Jingle bell": 202,
802 "Jingle, tinkle": 495,
803 "Keyboard (musical)": 152,
804 "Keys jangling": 379,
805 "Knock": 359,
806 "Laughter": 16,
807 "Lawn mower": 346,
808 "Light engine (high frequency)": 344,
809 "Liquid": 444,
810 "Livestock, farm animals, working animals": 86,
811 "Lullaby": 271,
812 "Machine gun": 428,
813 "Mains hum": 516,
814 "Male singing": 32,
815 "Male speech, man speaking": 1,
816 "Mallet percussion": 179,
817 "Mandolin": 149,
818 "Mantra": 31,
819 "Maraca": 176,
820 "Marimba, xylophone": 180,
821 "Mechanical fan": 412,
822 "Mechanisms": 404,
823 "Medium engine (mid frequency)": 348,
824 "Meow": 83,
825 "Microwave oven": 368,
826 "Middle Eastern music": 234,
827 "Moo": 91,
828 "Mosquito": 128,
829 "Motor vehicle (road)": 306,
830 "Motorboat, speedboat": 304,
831 "Motorcycle": 326,
832 "Mouse": 124,
833 "Music": 137,
834 "Music for children": 252,
835 "Music of Africa": 256,
836 "Music of Asia": 260,
837 "Music of Bollywood": 262,
838 "Music of Latin America": 248,
839 "Musical instrument": 138,
840 "Narration, monologue": 5,
841 "Neigh, whinny": 89,
842 "New-age music": 253,
843 "Noise": 513,
844 "Ocean": 294,
845 "Oink": 94,
846 "Opera": 238,
847 "Orchestra": 184,
848 "Organ": 155,
849 "Outside, rural or natural": 510,
850 "Outside, urban or manmade": 509,
851 "Owl": 119,
852 "Pant": 45,
853 "Patter": 125,
854 "Percussion": 161,
855 "Piano": 153,
856 "Pig": 93,
857 "Pigeon, dove": 115,
858 "Ping": 482,
859 "Pink noise": 521,
860 "Pizzicato": 192,
861 "Plop": 494,
862 "Plucked string instrument": 139,
863 "Police car (siren)": 323,
864 "Pop music": 216,
865 "Pour": 449,
866 "Power tool": 424,
867 "Power windows, electric windows": 311,
868 "Printer": 415,
869 "Progressive rock": 223,
870 "Propeller, airscrew": 338,
871 "Psychedelic rock": 225,
872 "Pulleys": 410,
873 "Pulse": 505,
874 "Pump (liquid)": 454,
875 "Punk rock": 221,
876 "Purr": 82,
877 "Quack": 105,
878 "Race car, auto racing": 315,
879 "Radio": 525,
880 "Rail transport": 328,
881 "Railroad car, train wagon": 332,
882 "Rain": 289,
883 "Rain on surface": 291,
884 "Raindrop": 290,
885 "Rapping": 36,
886 "Ratchet, pawl": 405,
887 "Rattle": 135,
888 "Rattle (instrument)": 175,
889 "Reggae": 228,
890 "Reverberation": 511,
891 "Reversing beeps": 319,
892 "Rhythm and blues": 226,
893 "Rimshot": 166,
894 "Ringtone": 391,
895 "Roar": 110,
896 "Roaring cats (lions, tigers)": 109,
897 "Rock and roll": 224,
898 "Rock music": 219,
899 "Rodents, rats, mice": 123,
900 "Roll": 477,
901 "Rowboat, canoe, kayak": 303,
902 "Rub": 476,
903 "Rumble": 493,
904 "Run": 51,
905 "Rustle": 487,
906 "Rustling leaves": 284,
907 "Sad music": 278,
908 "Sailboat, sailing ship": 302,
909 "Salsa music": 249,
910 "Sampler": 159,
911 "Sanding": 423,
912 "Sawing": 421,
913 "Saxophone": 197,
914 "Scary music": 282,
915 "Scissors": 381,
916 "Scrape": 475,
917 "Scratch": 474,
918 "Scratching (performance technique)": 215,
919 "Screaming": 14,
920 "Sewing machine": 411,
921 "Shatter": 443,
922 "Sheep": 97,
923 "Ship": 305,
924 "Shofar": 212,
925 "Shout": 8,
926 "Shuffle": 52,
927 "Shuffling cards": 383,
928 "Sidetone": 518,
929 "Sigh": 26,
930 "Silence": 500,
931 "Sine wave": 501,
932 "Singing": 27,
933 "Singing bowl": 214,
934 "Single-lens reflex camera": 417,
935 "Sink (filling or washing)": 371,
936 "Siren": 396,
937 "Sitar": 148,
938 "Sizzle": 490,
939 "Ska": 263,
940 "Skateboard": 342,
941 "Skidding": 312,
942 "Slam": 358,
943 "Slap, smack": 467,
944 "Sliding door": 357,
945 "Slosh": 446,
946 "Smash, crash": 469,
947 "Smoke detector, smoke alarm": 399,
948 "Snake": 134,
949 "Snare drum": 165,
950 "Sneeze": 49,
951 "Snicker": 19,
952 "Sniff": 50,
953 "Snoring": 43,
954 "Snort": 46,
955 "Sonar": 457,
956 "Song": 266,
957 "Soul music": 227,
958 "Sound effect": 504,
959 "Soundtrack music": 270,
960 "Speech": 0,
961 "Speech synthesizer": 7,
962 "Splash, splatter": 445,
963 "Splinter": 439,
964 "Spray": 453,
965 "Squawk": 114,
966 "Squeak": 361,
967 "Squeal": 485,
968 "Squish": 447,
969 "Static": 515,
970 "Steam": 296,
971 "Steam whistle": 403,
972 "Steel guitar, slide guitar": 144,
973 "Steelpan": 183,
974 "Stir": 455,
975 "Stomach rumble": 57,
976 "Stream": 292,
977 "String section": 190,
978 "Strum": 146,
979 "Subway, metro, underground": 334,
980 "Swing music": 230,
981 "Synthesizer": 158,
982 "Synthetic singing": 35,
983 "Tabla": 170,
984 "Tambourine": 174,
985 "Tap": 360,
986 "Tapping (guitar technique)": 145,
987 "Tearing": 480,
988 "Techno": 241,
989 "Telephone": 389,
990 "Telephone bell ringing": 390,
991 "Telephone dialing, DTMF": 392,
992 "Television": 524,
993 "Tender music": 279,
994 "Theme music": 268,
995 "Theremin": 213,
996 "Throat clearing": 48,
997 "Throbbing": 522,
998 "Thump, thud": 460,
999 "Thunder": 287,
1000 "Thunderstorm": 286,
1001 "Thunk": 461,
1002 "Tick": 407,
1003 "Tick-tock": 408,
1004 "Timpani": 169,
1005 "Tire squeal": 313,
1006 "Toilet flush": 374,
1007 "Tools": 418,
1008 "Toot": 309,
1009 "Toothbrush": 375,
1010 "Traditional music": 264,
1011 "Traffic noise, roadway noise": 327,
1012 "Train": 329,
1013 "Train horn": 331,
1014 "Train wheels squealing": 333,
1015 "Train whistle": 330,
1016 "Trance music": 247,
1017 "Trickle, dribble": 450,
1018 "Trombone": 188,
1019 "Truck": 316,
1020 "Trumpet": 187,
1021 "Tubular bells": 178,
1022 "Tuning fork": 204,
1023 "Turkey": 102,
1024 "Typewriter": 385,
1025 "Typing": 384,
1026 "Ukulele": 151,
1027 "Vacuum cleaner": 377,
1028 "Vehicle": 300,
1029 "Vehicle horn, car horn, honking": 308,
1030 "Vibraphone": 182,
1031 "Vibration": 523,
1032 "Video game music": 272,
1033 "Violin, fiddle": 191,
1034 "Vocal music": 254,
1035 "Wail, moan": 25,
1036 "Walk, footsteps": 53,
1037 "Water": 288,
1038 "Water tap, faucet": 370,
1039 "Waterfall": 293,
1040 "Waves, surf": 295,
1041 "Wedding music": 275,
1042 "Whack, thwack": 468,
1043 "Whale vocalization": 136,
1044 "Wheeze": 42,
1045 "Whimper": 24,
1046 "Whimper (dog)": 80,
1047 "Whip": 472,
1048 "Whir": 488,
1049 "Whispering": 15,
1050 "Whistle": 402,
1051 "Whistling": 40,
1052 "White noise": 520,
1053 "Whoop": 10,
1054 "Whoosh, swoosh, swish": 459,
1055 "Wild animals": 108,
1056 "Wind": 283,
1057 "Wind chime": 206,
1058 "Wind instrument, woodwind instrument": 195,
1059 "Wind noise (microphone)": 285,
1060 "Wood": 437,
1061 "Wood block": 173,
1062 "Writing": 387,
1063 "Yell": 11,
1064 "Yip": 76,
1065 "Yodeling": 29,
1066 "Zing": 497,
1067 "Zipper (clothing)": 378,
1068 "Zither": 150
1069 },
1070 "layer_norm_eps": 1e-12,
1071 "max_length": 1024,
1072 "model_type": "audio-spectrogram-transformer",
1073 "num_attention_heads": 12,
1074 "num_hidden_layers": 12,
1075 "num_mel_bins": 128,
1076 "patch_size": 16,
1077 "qkv_bias": true,
1078 "time_stride": 10,
1079 "torch_dtype": "float32",
1080 "transformers_version": "4.25.0.dev0"
1081 }
1082