tokenizer.json
24.9 KB · 1435 lines · json Raw
1 {
2 "version": "1.0",
3 "truncation": null,
4 "padding": null,
5 "added_tokens": [
6 {
7 "id": 0,
8 "special": true,
9 "content": "[STOP]",
10 "single_word": false,
11 "lstrip": false,
12 "rstrip": false,
13 "normalized": false
14 },
15 {
16 "id": 1,
17 "special": true,
18 "content": "[UNK]",
19 "single_word": false,
20 "lstrip": false,
21 "rstrip": false,
22 "normalized": false
23 },
24 {
25 "id": 2,
26 "special": true,
27 "content": "[SPACE]",
28 "single_word": false,
29 "lstrip": false,
30 "rstrip": false,
31 "normalized": false
32 },
33 {
34 "id": 255,
35 "special": true,
36 "content": "[START]",
37 "single_word": false,
38 "lstrip": false,
39 "rstrip": false,
40 "normalized": false
41 },
42 {
43 "id": 604,
44 "content": "[UH]",
45 "single_word": false,
46 "lstrip": false,
47 "rstrip": false,
48 "normalized": false,
49 "special": true
50 },
51 {
52 "id": 605,
53 "content": "[UM]",
54 "single_word": false,
55 "lstrip": false,
56 "rstrip": false,
57 "normalized": false,
58 "special": true
59 },
60 {
61 "id": 606,
62 "content": "[giggle]",
63 "single_word": false,
64 "lstrip": false,
65 "rstrip": false,
66 "normalized": false,
67 "special": true
68 },
69 {
70 "id": 607,
71 "content": "[laughter]",
72 "single_word": false,
73 "lstrip": false,
74 "rstrip": false,
75 "normalized": false,
76 "special": true
77 },
78 {
79 "id": 608,
80 "content": "[guffaw]",
81 "single_word": false,
82 "lstrip": false,
83 "rstrip": false,
84 "normalized": false,
85 "special": true
86 },
87 {
88 "id": 609,
89 "content": "[inhale]",
90 "single_word": false,
91 "lstrip": false,
92 "rstrip": false,
93 "normalized": false,
94 "special": true
95 },
96 {
97 "id": 610,
98 "content": "[exhale]",
99 "single_word": false,
100 "lstrip": false,
101 "rstrip": false,
102 "normalized": false,
103 "special": true
104 },
105 {
106 "id": 611,
107 "content": "[sigh]",
108 "single_word": false,
109 "lstrip": false,
110 "rstrip": false,
111 "normalized": false,
112 "special": true
113 },
114 {
115 "id": 612,
116 "content": "[cry]",
117 "single_word": false,
118 "lstrip": false,
119 "rstrip": false,
120 "normalized": false,
121 "special": true
122 },
123 {
124 "id": 613,
125 "content": "[bark]",
126 "single_word": false,
127 "lstrip": false,
128 "rstrip": false,
129 "normalized": false,
130 "special": true
131 },
132 {
133 "id": 614,
134 "content": "[howl]",
135 "single_word": false,
136 "lstrip": false,
137 "rstrip": false,
138 "normalized": false,
139 "special": true
140 },
141 {
142 "id": 615,
143 "content": "[meow]",
144 "single_word": false,
145 "lstrip": false,
146 "rstrip": false,
147 "normalized": false,
148 "special": true
149 },
150 {
151 "id": 616,
152 "content": "[singing]",
153 "single_word": false,
154 "lstrip": false,
155 "rstrip": false,
156 "normalized": false,
157 "special": true
158 },
159 {
160 "id": 617,
161 "content": "[music]",
162 "single_word": false,
163 "lstrip": false,
164 "rstrip": false,
165 "normalized": false,
166 "special": true
167 },
168 {
169 "id": 618,
170 "content": "[whistle]",
171 "single_word": false,
172 "lstrip": false,
173 "rstrip": false,
174 "normalized": false,
175 "special": true
176 },
177 {
178 "id": 619,
179 "content": "[humming]",
180 "single_word": false,
181 "lstrip": false,
182 "rstrip": false,
183 "normalized": false,
184 "special": true
185 },
186 {
187 "id": 620,
188 "content": "[gasp]",
189 "single_word": false,
190 "lstrip": false,
191 "rstrip": false,
192 "normalized": false,
193 "special": true
194 },
195 {
196 "id": 621,
197 "content": "[groan]",
198 "single_word": false,
199 "lstrip": false,
200 "rstrip": false,
201 "normalized": false,
202 "special": true
203 },
204 {
205 "id": 622,
206 "content": "[whisper]",
207 "single_word": false,
208 "lstrip": false,
209 "rstrip": false,
210 "normalized": false,
211 "special": true
212 },
213 {
214 "id": 623,
215 "content": "[mumble]",
216 "single_word": false,
217 "lstrip": false,
218 "rstrip": false,
219 "normalized": false,
220 "special": true
221 },
222 {
223 "id": 624,
224 "content": "[sniff]",
225 "single_word": false,
226 "lstrip": false,
227 "rstrip": false,
228 "normalized": false,
229 "special": true
230 },
231 {
232 "id": 625,
233 "content": "[sneeze]",
234 "single_word": false,
235 "lstrip": false,
236 "rstrip": false,
237 "normalized": false,
238 "special": true
239 },
240 {
241 "id": 626,
242 "content": "[cough]",
243 "single_word": false,
244 "lstrip": false,
245 "rstrip": false,
246 "normalized": false,
247 "special": true
248 },
249 {
250 "id": 627,
251 "content": "[snore]",
252 "single_word": false,
253 "lstrip": false,
254 "rstrip": false,
255 "normalized": false,
256 "special": true
257 },
258 {
259 "id": 628,
260 "content": "[chew]",
261 "single_word": false,
262 "lstrip": false,
263 "rstrip": false,
264 "normalized": false,
265 "special": true
266 },
267 {
268 "id": 629,
269 "content": "[sip]",
270 "single_word": false,
271 "lstrip": false,
272 "rstrip": false,
273 "normalized": false,
274 "special": true
275 },
276 {
277 "id": 630,
278 "content": "[clear_throat]",
279 "single_word": false,
280 "lstrip": false,
281 "rstrip": false,
282 "normalized": false,
283 "special": true
284 },
285 {
286 "id": 631,
287 "content": "[kiss]",
288 "single_word": false,
289 "lstrip": false,
290 "rstrip": false,
291 "normalized": false,
292 "special": true
293 },
294 {
295 "id": 632,
296 "content": "[shhh]",
297 "single_word": false,
298 "lstrip": false,
299 "rstrip": false,
300 "normalized": false,
301 "special": true
302 },
303 {
304 "id": 633,
305 "content": "[gibberish]",
306 "single_word": false,
307 "lstrip": false,
308 "rstrip": false,
309 "normalized": false,
310 "special": true
311 },
312 {
313 "id": 634,
314 "content": "[fr]",
315 "single_word": false,
316 "lstrip": false,
317 "rstrip": false,
318 "normalized": false,
319 "special": true
320 },
321 {
322 "id": 635,
323 "content": "[es]",
324 "single_word": false,
325 "lstrip": false,
326 "rstrip": false,
327 "normalized": false,
328 "special": true
329 },
330 {
331 "id": 636,
332 "content": "[de]",
333 "single_word": false,
334 "lstrip": false,
335 "rstrip": false,
336 "normalized": false,
337 "special": true
338 },
339 {
340 "id": 637,
341 "content": "[it]",
342 "single_word": false,
343 "lstrip": false,
344 "rstrip": false,
345 "normalized": false,
346 "special": true
347 },
348 {
349 "id": 638,
350 "content": "[ipa]",
351 "single_word": false,
352 "lstrip": false,
353 "rstrip": false,
354 "normalized": false,
355 "special": true
356 },
357 {
358 "id": 639,
359 "content": "[end_of_label]",
360 "single_word": false,
361 "lstrip": false,
362 "rstrip": false,
363 "normalized": false,
364 "special": true
365 },
366 {
367 "id": 695,
368 "content": "[PLACEHOLDER55]",
369 "single_word": false,
370 "lstrip": false,
371 "rstrip": false,
372 "normalized": false,
373 "special": true
374 },
375 {
376 "id": 696,
377 "content": "[PLACEHOLDER56]",
378 "single_word": false,
379 "lstrip": false,
380 "rstrip": false,
381 "normalized": false,
382 "special": true
383 },
384 {
385 "id": 697,
386 "content": "[PLACEHOLDER57]",
387 "single_word": false,
388 "lstrip": false,
389 "rstrip": false,
390 "normalized": false,
391 "special": true
392 },
393 {
394 "id": 698,
395 "content": "[PLACEHOLDER58]",
396 "single_word": false,
397 "lstrip": false,
398 "rstrip": false,
399 "normalized": false,
400 "special": true
401 },
402 {
403 "id": 699,
404 "content": "[PLACEHOLDER59]",
405 "single_word": false,
406 "lstrip": false,
407 "rstrip": false,
408 "normalized": false,
409 "special": true
410 },
411 {
412 "id": 700,
413 "content": "[PLACEHOLDER60]",
414 "single_word": false,
415 "lstrip": false,
416 "rstrip": false,
417 "normalized": false,
418 "special": true
419 },
420 {
421 "id": 701,
422 "content": "[PLACEHOLDER61]",
423 "single_word": false,
424 "lstrip": false,
425 "rstrip": false,
426 "normalized": false,
427 "special": true
428 },
429 {
430 "id": 702,
431 "content": "[PLACEHOLDER62]",
432 "single_word": false,
433 "lstrip": false,
434 "rstrip": false,
435 "normalized": false,
436 "special": true
437 },
438 {
439 "id": 703,
440 "content": "[PLACEHOLDER63]",
441 "single_word": false,
442 "lstrip": false,
443 "rstrip": false,
444 "normalized": false,
445 "special": true
446 }
447 ],
448 "normalizer": null,
449 "pre_tokenizer": {
450 "type": "Whitespace"
451 },
452 "post_processor": null,
453 "decoder": null,
454 "model": {
455 "type": "BPE",
456 "dropout": null,
457 "unk_token": "[UNK]",
458 "continuing_subword_prefix": null,
459 "end_of_word_suffix": null,
460 "fuse_unk": false,
461 "vocab": {
462 "[STOP]": 0,
463 "[UNK]": 1,
464 "[SPACE]": 2,
465 "!": 3,
466 "'": 4,
467 "(": 5,
468 ")": 6,
469 ",": 7,
470 "-": 8,
471 ".": 9,
472 "/": 10,
473 ":": 11,
474 ";": 12,
475 "?": 13,
476 "a": 14,
477 "b": 15,
478 "c": 16,
479 "d": 17,
480 "e": 18,
481 "f": 19,
482 "g": 20,
483 "h": 21,
484 "i": 22,
485 "j": 23,
486 "k": 24,
487 "l": 25,
488 "m": 26,
489 "n": 27,
490 "o": 28,
491 "p": 29,
492 "q": 30,
493 "r": 31,
494 "s": 32,
495 "t": 33,
496 "u": 34,
497 "v": 35,
498 "w": 36,
499 "x": 37,
500 "y": 38,
501 "z": 39,
502 "th": 40,
503 "in": 41,
504 "the": 42,
505 "an": 43,
506 "er": 44,
507 "ou": 45,
508 "re": 46,
509 "on": 47,
510 "at": 48,
511 "ed": 49,
512 "en": 50,
513 "to": 51,
514 "ing": 52,
515 "and": 53,
516 "is": 54,
517 "as": 55,
518 "al": 56,
519 "or": 57,
520 "of": 58,
521 "ar": 59,
522 "it": 60,
523 "es": 61,
524 "he": 62,
525 "st": 63,
526 "le": 64,
527 "om": 65,
528 "se": 66,
529 "be": 67,
530 "ad": 68,
531 "ow": 69,
532 "ly": 70,
533 "ch": 71,
534 "wh": 72,
535 "that": 73,
536 "you": 74,
537 "li": 75,
538 "ve": 76,
539 "ac": 77,
540 "ti": 78,
541 "ld": 79,
542 "me": 80,
543 "was": 81,
544 "gh": 82,
545 "id": 83,
546 "ll": 84,
547 "wi": 85,
548 "ent": 86,
549 "for": 87,
550 "ay": 88,
551 "ro": 89,
552 "ver": 90,
553 "ic": 91,
554 "her": 92,
555 "ke": 93,
556 "his": 94,
557 "no": 95,
558 "ut": 96,
559 "un": 97,
560 "ir": 98,
561 "lo": 99,
562 "we": 100,
563 "ri": 101,
564 "ha": 102,
565 "with": 103,
566 "ght": 104,
567 "out": 105,
568 "im": 106,
569 "ion": 107,
570 "all": 108,
571 "ab": 109,
572 "one": 110,
573 "ne": 111,
574 "ge": 112,
575 "ould": 113,
576 "ter": 114,
577 "mo": 115,
578 "had": 116,
579 "ce": 117,
580 "she": 118,
581 "go": 119,
582 "sh": 120,
583 "ur": 121,
584 "am": 122,
585 "so": 123,
586 "pe": 124,
587 "my": 125,
588 "de": 126,
589 "are": 127,
590 "but": 128,
591 "ome": 129,
592 "fr": 130,
593 "ther": 131,
594 "fe": 132,
595 "su": 133,
596 "do": 134,
597 "con": 135,
598 "te": 136,
599 "ain": 137,
600 "ere": 138,
601 "po": 139,
602 "if": 140,
603 "they": 141,
604 "us": 142,
605 "ag": 143,
606 "tr": 144,
607 "now": 145,
608 "oun": 146,
609 "this": 147,
610 "have": 148,
611 "not": 149,
612 "sa": 150,
613 "il": 151,
614 "up": 152,
615 "thing": 153,
616 "from": 154,
617 "ap": 155,
618 "him": 156,
619 "ack": 157,
620 "ation": 158,
621 "ant": 159,
622 "our": 160,
623 "op": 161,
624 "like": 162,
625 "ust": 163,
626 "ess": 164,
627 "bo": 165,
628 "ok": 166,
629 "ul": 167,
630 "ind": 168,
631 "ex": 169,
632 "com": 170,
633 "some": 171,
634 "there": 172,
635 "ers": 173,
636 "co": 174,
637 "res": 175,
638 "man": 176,
639 "ard": 177,
640 "pl": 178,
641 "wor": 179,
642 "way": 180,
643 "tion": 181,
644 "fo": 182,
645 "ca": 183,
646 "were": 184,
647 "by": 185,
648 "ate": 186,
649 "pro": 187,
650 "ted": 188,
651 "ound": 189,
652 "own": 190,
653 "would": 191,
654 "ts": 192,
655 "what": 193,
656 "qu": 194,
657 "ally": 195,
658 "ight": 196,
659 "ck": 197,
660 "gr": 198,
661 "when": 199,
662 "ven": 200,
663 "can": 201,
664 "ough": 202,
665 "ine": 203,
666 "end": 204,
667 "per": 205,
668 "ous": 206,
669 "od": 207,
670 "ide": 208,
671 "know": 209,
672 "ty": 210,
673 "very": 211,
674 "si": 212,
675 "ak": 213,
676 "who": 214,
677 "about": 215,
678 "ill": 216,
679 "them": 217,
680 "est": 218,
681 "red": 219,
682 "ye": 220,
683 "could": 221,
684 "ong": 222,
685 "your": 223,
686 "their": 224,
687 "em": 225,
688 "just": 226,
689 "other": 227,
690 "into": 228,
691 "any": 229,
692 "whi": 230,
693 "um": 231,
694 "tw": 232,
695 "ast": 233,
696 "der": 234,
697 "did": 235,
698 "ie": 236,
699 "been": 237,
700 "ace": 238,
701 "ink": 239,
702 "ity": 240,
703 "back": 241,
704 "ting": 242,
705 "br": 243,
706 "more": 244,
707 "ake": 245,
708 "pp": 246,
709 "then": 247,
710 "sp": 248,
711 "el": 249,
712 "use": 250,
713 "bl": 251,
714 "said": 252,
715 "over": 253,
716 "get": 254,
717 "[START]": 255,
718 "\"": 256,
719 "#": 257,
720 "$": 258,
721 "%": 259,
722 "&": 260,
723 "*": 261,
724 "+": 262,
725 "0": 263,
726 "1": 264,
727 "2": 265,
728 "3": 266,
729 "4": 267,
730 "5": 268,
731 "6": 269,
732 "7": 270,
733 "8": 271,
734 "9": 272,
735 "<": 273,
736 "=": 274,
737 ">": 275,
738 "@": 276,
739 "A": 277,
740 "B": 278,
741 "C": 279,
742 "D": 280,
743 "E": 281,
744 "F": 282,
745 "G": 283,
746 "H": 284,
747 "I": 285,
748 "J": 286,
749 "K": 287,
750 "L": 288,
751 "M": 289,
752 "N": 290,
753 "O": 291,
754 "P": 292,
755 "Q": 293,
756 "R": 294,
757 "S": 295,
758 "T": 296,
759 "U": 297,
760 "V": 298,
761 "W": 299,
762 "X": 300,
763 "Y": 301,
764 "Z": 302,
765 "[": 303,
766 "\\": 304,
767 "]": 305,
768 "^": 306,
769 "_": 307,
770 "`": 308,
771 "{": 309,
772 "|": 310,
773 "}": 311,
774 "~": 312,
775 "‐": 313,
776 "‑": 314,
777 "‒": 315,
778 "–": 316,
779 "—": 317,
780 "―": 318,
781 "‖": 319,
782 "‗": 320,
783 "‘": 321,
784 "’": 322,
785 "‚": 323,
786 "‛": 324,
787 "“": 325,
788 "”": 326,
789 "„": 327,
790 "‟": 328,
791 " ": 329,
792 "¡": 330,
793 "¢": 331,
794 "£": 332,
795 "¤": 333,
796 "¥": 334,
797 "¦": 335,
798 "§": 336,
799 "¨": 337,
800 "©": 338,
801 "ª": 339,
802 "«": 340,
803 "¬": 341,
804 "­": 342,
805 "®": 343,
806 "¯": 344,
807 "°": 345,
808 "±": 346,
809 "²": 347,
810 "³": 348,
811 "´": 349,
812 "µ": 350,
813 "¶": 351,
814 "·": 352,
815 "¸": 353,
816 "¹": 354,
817 "º": 355,
818 "»": 356,
819 "¼": 357,
820 "½": 358,
821 "¾": 359,
822 "¿": 360,
823 "À": 361,
824 "Á": 362,
825 "Â": 363,
826 "Ã": 364,
827 "Ä": 365,
828 "Å": 366,
829 "Æ": 367,
830 "Ç": 368,
831 "È": 369,
832 "É": 370,
833 "Ê": 371,
834 "Ë": 372,
835 "Ì": 373,
836 "Í": 374,
837 "Î": 375,
838 "Ï": 376,
839 "Ð": 377,
840 "Ñ": 378,
841 "Ò": 379,
842 "Ó": 380,
843 "Ô": 381,
844 "Õ": 382,
845 "Ö": 383,
846 "×": 384,
847 "Ø": 385,
848 "Ù": 386,
849 "Ú": 387,
850 "Û": 388,
851 "Ü": 389,
852 "Ý": 390,
853 "Þ": 391,
854 "ß": 392,
855 "à": 393,
856 "á": 394,
857 "â": 395,
858 "ã": 396,
859 "ä": 397,
860 "å": 398,
861 "æ": 399,
862 "ç": 400,
863 "è": 401,
864 "é": 402,
865 "ê": 403,
866 "ë": 404,
867 "ì": 405,
868 "í": 406,
869 "î": 407,
870 "ï": 408,
871 "ð": 409,
872 "ñ": 410,
873 "ò": 411,
874 "ó": 412,
875 "ô": 413,
876 "õ": 414,
877 "ö": 415,
878 "÷": 416,
879 "ø": 417,
880 "ù": 418,
881 "ú": 419,
882 "û": 420,
883 "ü": 421,
884 "ý": 422,
885 "þ": 423,
886 "ÿ": 424,
887 "ɐ": 425,
888 "ɑ": 426,
889 "ɒ": 427,
890 "ɓ": 428,
891 "ɔ": 429,
892 "ɕ": 430,
893 "ɖ": 431,
894 "ɗ": 432,
895 "ɘ": 433,
896 "ə": 434,
897 "ɚ": 435,
898 "ɛ": 436,
899 "ɜ": 437,
900 "ɝ": 438,
901 "ɞ": 439,
902 "ɟ": 440,
903 "ɠ": 441,
904 "ɡ": 442,
905 "ɢ": 443,
906 "ɣ": 444,
907 "ɤ": 445,
908 "ɥ": 446,
909 "ɦ": 447,
910 "ɧ": 448,
911 "ɨ": 449,
912 "ɩ": 450,
913 "ɪ": 451,
914 "ɫ": 452,
915 "ɬ": 453,
916 "ɭ": 454,
917 "ɮ": 455,
918 "ɯ": 456,
919 "ɰ": 457,
920 "ɱ": 458,
921 "ɲ": 459,
922 "ɳ": 460,
923 "ɴ": 461,
924 "ɵ": 462,
925 "ɶ": 463,
926 "ɷ": 464,
927 "ɸ": 465,
928 "ɹ": 466,
929 "ɺ": 467,
930 "ɻ": 468,
931 "ɼ": 469,
932 "ɽ": 470,
933 "ɾ": 471,
934 "ɿ": 472,
935 "ʀ": 473,
936 "ʁ": 474,
937 "ʂ": 475,
938 "ʃ": 476,
939 "ʄ": 477,
940 "ʅ": 478,
941 "ʆ": 479,
942 "ʇ": 480,
943 "ʈ": 481,
944 "ʉ": 482,
945 "ʊ": 483,
946 "ʋ": 484,
947 "ʌ": 485,
948 "ʍ": 486,
949 "ʎ": 487,
950 "ʏ": 488,
951 "ʐ": 489,
952 "ʑ": 490,
953 "ʒ": 491,
954 "ʓ": 492,
955 "ʔ": 493,
956 "ʕ": 494,
957 "ʖ": 495,
958 "ʗ": 496,
959 "ʘ": 497,
960 "ʙ": 498,
961 "ʚ": 499,
962 "ʛ": 500,
963 "ʜ": 501,
964 "ʝ": 502,
965 "ʞ": 503,
966 "ʟ": 504,
967 "ʠ": 505,
968 "ʡ": 506,
969 "ʢ": 507,
970 "ʣ": 508,
971 "ʤ": 509,
972 "ʥ": 510,
973 "ʦ": 511,
974 "ʧ": 512,
975 "ʨ": 513,
976 "ʩ": 514,
977 "ʪ": 515,
978 "ʫ": 516,
979 "ʬ": 517,
980 "ʭ": 518,
981 "ʮ": 519,
982 "ʯ": 520,
983 "ʰ": 521,
984 "ʱ": 522,
985 "ʲ": 523,
986 "ʳ": 524,
987 "ʴ": 525,
988 "ʵ": 526,
989 "ʶ": 527,
990 "ʷ": 528,
991 "ʸ": 529,
992 "ʹ": 530,
993 "ʺ": 531,
994 "ʻ": 532,
995 "ʼ": 533,
996 "ʽ": 534,
997 "ʾ": 535,
998 "ʿ": 536,
999 "ˀ": 537,
1000 "ˁ": 538,
1001 "˂": 539,
1002 "˃": 540,
1003 "˄": 541,
1004 "˅": 542,
1005 "ˆ": 543,
1006 "ˇ": 544,
1007 "ˈ": 545,
1008 "ˉ": 546,
1009 "ˊ": 547,
1010 "ˋ": 548,
1011 "ˌ": 549,
1012 "ˍ": 550,
1013 "ˎ": 551,
1014 "ˏ": 552,
1015 "ː": 553,
1016 "ˑ": 554,
1017 "˒": 555,
1018 "˓": 556,
1019 "˔": 557,
1020 "˕": 558,
1021 "˖": 559,
1022 "˗": 560,
1023 "˘": 561,
1024 "˙": 562,
1025 "˚": 563,
1026 "˛": 564,
1027 "˜": 565,
1028 "˝": 566,
1029 "˞": 567,
1030 "˟": 568,
1031 "ˠ": 569,
1032 "ˡ": 570,
1033 "ˢ": 571,
1034 "ˣ": 572,
1035 "ˤ": 573,
1036 "˥": 574,
1037 "˦": 575,
1038 "˧": 576,
1039 "˨": 577,
1040 "˩": 578,
1041 "˪": 579,
1042 "˫": 580,
1043 "ˬ": 581,
1044 "˭": 582,
1045 "ˮ": 583,
1046 "˯": 584,
1047 "˰": 585,
1048 "˱": 586,
1049 "˲": 587,
1050 "˳": 588,
1051 "˴": 589,
1052 "˵": 590,
1053 "˶": 591,
1054 "˷": 592,
1055 "˸": 593,
1056 "˹": 594,
1057 "˺": 595,
1058 "˻": 596,
1059 "˼": 597,
1060 "˽": 598,
1061 "˾": 599,
1062 "˿": 600,
1063 "ā": 601,
1064 "ō": 602,
1065 "…": 603,
1066 "[UH]": 604,
1067 "[UM]": 605,
1068 "[giggle]": 606,
1069 "[laughter]": 607,
1070 "[guffaw]": 608,
1071 "[inhale]": 609,
1072 "[exhale]": 610,
1073 "[sigh]": 611,
1074 "[cry]": 612,
1075 "[bark]": 613,
1076 "[howl]": 614,
1077 "[meow]": 615,
1078 "[singing]": 616,
1079 "[music]": 617,
1080 "[whistle]": 618,
1081 "[humming]": 619,
1082 "[gasp]": 620,
1083 "[groan]": 621,
1084 "[whisper]": 622,
1085 "[mumble]": 623,
1086 "[sniff]": 624,
1087 "[sneeze]": 625,
1088 "[cough]": 626,
1089 "[snore]": 627,
1090 "[chew]": 628,
1091 "[sip]": 629,
1092 "[clear_throat]": 630,
1093 "[kiss]": 631,
1094 "[shhh]": 632,
1095 "[gibberish]": 633,
1096 "[fr]": 634,
1097 "[es]": 635,
1098 "[de]": 636,
1099 "[it]": 637,
1100 "[ipa]": 638,
1101 "[end_of_label]": 639,
1102 "ŋ": 640,
1103 "ᵻ": 641,
1104 "θ": 642,
1105 "̩": 643,
1106 "\u0303": 644,
1107 "ɑː": 645,
1108 "iː": 646,
1109 "uː": 647,
1110 "ɜː": 648,
1111 "ɔː": 649,
1112 "oː": 650,
1113 "eɪ": 651,
1114 "oʊ": 652,
1115 "aɪ": 653,
1116 "aʊ": 654,
1117 "ɔɪ": 655,
1118 "dʒ": 656,
1119 "tʃ": 657,
1120 "ɪŋ": 658,
1121 "ᵻd": 659,
1122 "ˈiː": 660,
1123 "ˌiː": 661,
1124 "ˈɪ": 662,
1125 "ˌɪ": 663,
1126 "ˈeɪ": 664,
1127 "ˌeɪ": 665,
1128 "ˈɛ": 666,
1129 "ˌɛ": 667,
1130 "ˈæ": 668,
1131 "ˌæ": 669,
1132 "ˈɑː": 670,
1133 "ˌɑː": 671,
1134 "ˈɔː": 672,
1135 "ˌɔː": 673,
1136 "oːɹ": 674,
1137 "ˈoːɹ": 675,
1138 "ˌoːɹ": 676,
1139 "ˈoʊ": 677,
1140 "ˌoʊ": 678,
1141 "ˈʊ": 679,
1142 "ˌʊ": 680,
1143 "ˈuː": 681,
1144 "ˌuː": 682,
1145 "ˈɜː": 683,
1146 "ˌɜː": 684,
1147 "ˈʌ": 685,
1148 "ˌʌ": 686,
1149 "ˈaɪ": 687,
1150 "ˌaɪ": 688,
1151 "ˈaʊ": 689,
1152 "ˌaʊ": 690,
1153 "ˈɔɪ": 691,
1154 "ˌɔɪ": 692,
1155 "ˈɚ": 693,
1156 "ˌɐ": 694,
1157 "[PLACEHOLDER55]": 695,
1158 "[PLACEHOLDER56]": 696,
1159 "[PLACEHOLDER57]": 697,
1160 "[PLACEHOLDER58]": 698,
1161 "[PLACEHOLDER59]": 699,
1162 "[PLACEHOLDER60]": 700,
1163 "[PLACEHOLDER61]": 701,
1164 "[PLACEHOLDER62]": 702,
1165 "[PLACEHOLDER63]": 703
1166 },
1167 "merges": [
1168 "t h",
1169 "i n",
1170 "th e",
1171 "a n",
1172 "e r",
1173 "o u",
1174 "r e",
1175 "o n",
1176 "a t",
1177 "e d",
1178 "e n",
1179 "t o",
1180 "in g",
1181 "an d",
1182 "i s",
1183 "a s",
1184 "a l",
1185 "o r",
1186 "o f",
1187 "a r",
1188 "i t",
1189 "e s",
1190 "h e",
1191 "s t",
1192 "l e",
1193 "o m",
1194 "s e",
1195 "b e",
1196 "a d",
1197 "o w",
1198 "l y",
1199 "c h",
1200 "w h",
1201 "th at",
1202 "y ou",
1203 "l i",
1204 "v e",
1205 "a c",
1206 "t i",
1207 "l d",
1208 "m e",
1209 "w as",
1210 "g h",
1211 "i d",
1212 "l l",
1213 "w i",
1214 "en t",
1215 "f or",
1216 "a y",
1217 "r o",
1218 "v er",
1219 "i c",
1220 "h er",
1221 "k e",
1222 "h is",
1223 "n o",
1224 "u t",
1225 "u n",
1226 "i r",
1227 "l o",
1228 "w e",
1229 "r i",
1230 "h a",
1231 "wi th",
1232 "gh t",
1233 "ou t",
1234 "i m",
1235 "i on",
1236 "al l",
1237 "a b",
1238 "on e",
1239 "n e",
1240 "g e",
1241 "ou ld",
1242 "t er",
1243 "m o",
1244 "h ad",
1245 "c e",
1246 "s he",
1247 "g o",
1248 "s h",
1249 "u r",
1250 "a m",
1251 "s o",
1252 "p e",
1253 "m y",
1254 "d e",
1255 "a re",
1256 "b ut",
1257 "om e",
1258 "f r",
1259 "the r",
1260 "f e",
1261 "s u",
1262 "d o",
1263 "c on",
1264 "t e",
1265 "a in",
1266 "er e",
1267 "p o",
1268 "i f",
1269 "the y",
1270 "u s",
1271 "a g",
1272 "t r",
1273 "n ow",
1274 "ou n",
1275 "th is",
1276 "ha ve",
1277 "no t",
1278 "s a",
1279 "i l",
1280 "u p",
1281 "th ing",
1282 "fr om",
1283 "a p",
1284 "h im",
1285 "ac k",
1286 "at ion",
1287 "an t",
1288 "ou r",
1289 "o p",
1290 "li ke",
1291 "u st",
1292 "es s",
1293 "b o",
1294 "o k",
1295 "u l",
1296 "in d",
1297 "e x",
1298 "c om",
1299 "s ome",
1300 "the re",
1301 "er s",
1302 "c o",
1303 "re s",
1304 "m an",
1305 "ar d",
1306 "p l",
1307 "w or",
1308 "w ay",
1309 "ti on",
1310 "f o",
1311 "c a",
1312 "w ere",
1313 "b y",
1314 "at e",
1315 "p ro",
1316 "t ed",
1317 "oun d",
1318 "ow n",
1319 "w ould",
1320 "t s",
1321 "wh at",
1322 "q u",
1323 "al ly",
1324 "i ght",
1325 "c k",
1326 "g r",
1327 "wh en",
1328 "v en",
1329 "c an",
1330 "ou gh",
1331 "in e",
1332 "en d",
1333 "p er",
1334 "ou s",
1335 "o d",
1336 "id e",
1337 "k now",
1338 "t y",
1339 "ver y",
1340 "s i",
1341 "a k",
1342 "wh o",
1343 "ab out",
1344 "i ll",
1345 "the m",
1346 "es t",
1347 "re d",
1348 "y e",
1349 "c ould",
1350 "on g",
1351 "you r",
1352 "the ir",
1353 "e m",
1354 "j ust",
1355 "o ther",
1356 "in to",
1357 "an y",
1358 "wh i",
1359 "u m",
1360 "t w",
1361 "as t",
1362 "d er",
1363 "d id",
1364 "i e",
1365 "be en",
1366 "ac e",
1367 "in k",
1368 "it y",
1369 "b ack",
1370 "t ing",
1371 "b r",
1372 "mo re",
1373 "a ke",
1374 "p p",
1375 "the n",
1376 "s p",
1377 "e l",
1378 "u se",
1379 "b l",
1380 "sa id",
1381 "o ver",
1382 "ge t",
1383 "ɑ ː",
1384 "i ː",
1385 "u ː",
1386 "ɜ ː",
1387 "ɔ ː",
1388 "o ː",
1389 "e ɪ",
1390 "o ʊ",
1391 "a ɪ",
1392 "a ʊ",
1393 "ɔ ɪ",
1394 "d ʒ",
1395 "t ʃ",
1396 "ɪ ŋ",
1397 "ᵻ d",
1398 "ˈ iː",
1399 "ˌ iː",
1400 "ˈ ɪ",
1401 "ˌ ɪ",
1402 "ˈ eɪ",
1403 "ˌ eɪ",
1404 "ˈ ɛ",
1405 "ˌ ɛ",
1406 "ˈ æ",
1407 "ˌ æ",
1408 "ˈ ɑː",
1409 "ˌ ɑː",
1410 "ˈ ɔː",
1411 "ˌ ɔː",
1412 "oː ɹ",
1413 "ˈ oːɹ",
1414 "ˌ oːɹ",
1415 "ˈ oʊ",
1416 "ˌ oʊ",
1417 "ˈ ʊ",
1418 "ˌ ʊ",
1419 "ˈ uː",
1420 "ˌ uː",
1421 "ˈ ɜː",
1422 "ˌ ɜː",
1423 "ˈ ʌ",
1424 "ˌ ʌ",
1425 "ˈ aɪ",
1426 "ˌ aɪ",
1427 "ˈ aʊ",
1428 "ˌ aʊ",
1429 "ˈ ɔɪ",
1430 "ˌ ɔɪ",
1431 "ˈ ɚ",
1432 "ˌ ɐ"
1433 ]
1434 }
1435 }