tokenizer.json
| 1 | { |
| 2 | "version": "1.0", |
| 3 | "truncation": null, |
| 4 | "padding": null, |
| 5 | "added_tokens": [ |
| 6 | { |
| 7 | "id": 0, |
| 8 | "special": true, |
| 9 | "content": "[STOP]", |
| 10 | "single_word": false, |
| 11 | "lstrip": false, |
| 12 | "rstrip": false, |
| 13 | "normalized": false |
| 14 | }, |
| 15 | { |
| 16 | "id": 1, |
| 17 | "special": true, |
| 18 | "content": "[UNK]", |
| 19 | "single_word": false, |
| 20 | "lstrip": false, |
| 21 | "rstrip": false, |
| 22 | "normalized": false |
| 23 | }, |
| 24 | { |
| 25 | "id": 2, |
| 26 | "special": true, |
| 27 | "content": "[SPACE]", |
| 28 | "single_word": false, |
| 29 | "lstrip": false, |
| 30 | "rstrip": false, |
| 31 | "normalized": false |
| 32 | }, |
| 33 | { |
| 34 | "id": 255, |
| 35 | "special": true, |
| 36 | "content": "[START]", |
| 37 | "single_word": false, |
| 38 | "lstrip": false, |
| 39 | "rstrip": false, |
| 40 | "normalized": false |
| 41 | }, |
| 42 | { |
| 43 | "id": 604, |
| 44 | "content": "[UH]", |
| 45 | "single_word": false, |
| 46 | "lstrip": false, |
| 47 | "rstrip": false, |
| 48 | "normalized": false, |
| 49 | "special": true |
| 50 | }, |
| 51 | { |
| 52 | "id": 605, |
| 53 | "content": "[UM]", |
| 54 | "single_word": false, |
| 55 | "lstrip": false, |
| 56 | "rstrip": false, |
| 57 | "normalized": false, |
| 58 | "special": true |
| 59 | }, |
| 60 | { |
| 61 | "id": 606, |
| 62 | "content": "[giggle]", |
| 63 | "single_word": false, |
| 64 | "lstrip": false, |
| 65 | "rstrip": false, |
| 66 | "normalized": false, |
| 67 | "special": true |
| 68 | }, |
| 69 | { |
| 70 | "id": 607, |
| 71 | "content": "[laughter]", |
| 72 | "single_word": false, |
| 73 | "lstrip": false, |
| 74 | "rstrip": false, |
| 75 | "normalized": false, |
| 76 | "special": true |
| 77 | }, |
| 78 | { |
| 79 | "id": 608, |
| 80 | "content": "[guffaw]", |
| 81 | "single_word": false, |
| 82 | "lstrip": false, |
| 83 | "rstrip": false, |
| 84 | "normalized": false, |
| 85 | "special": true |
| 86 | }, |
| 87 | { |
| 88 | "id": 609, |
| 89 | "content": "[inhale]", |
| 90 | "single_word": false, |
| 91 | "lstrip": false, |
| 92 | "rstrip": false, |
| 93 | "normalized": false, |
| 94 | "special": true |
| 95 | }, |
| 96 | { |
| 97 | "id": 610, |
| 98 | "content": "[exhale]", |
| 99 | "single_word": false, |
| 100 | "lstrip": false, |
| 101 | "rstrip": false, |
| 102 | "normalized": false, |
| 103 | "special": true |
| 104 | }, |
| 105 | { |
| 106 | "id": 611, |
| 107 | "content": "[sigh]", |
| 108 | "single_word": false, |
| 109 | "lstrip": false, |
| 110 | "rstrip": false, |
| 111 | "normalized": false, |
| 112 | "special": true |
| 113 | }, |
| 114 | { |
| 115 | "id": 612, |
| 116 | "content": "[cry]", |
| 117 | "single_word": false, |
| 118 | "lstrip": false, |
| 119 | "rstrip": false, |
| 120 | "normalized": false, |
| 121 | "special": true |
| 122 | }, |
| 123 | { |
| 124 | "id": 613, |
| 125 | "content": "[bark]", |
| 126 | "single_word": false, |
| 127 | "lstrip": false, |
| 128 | "rstrip": false, |
| 129 | "normalized": false, |
| 130 | "special": true |
| 131 | }, |
| 132 | { |
| 133 | "id": 614, |
| 134 | "content": "[howl]", |
| 135 | "single_word": false, |
| 136 | "lstrip": false, |
| 137 | "rstrip": false, |
| 138 | "normalized": false, |
| 139 | "special": true |
| 140 | }, |
| 141 | { |
| 142 | "id": 615, |
| 143 | "content": "[meow]", |
| 144 | "single_word": false, |
| 145 | "lstrip": false, |
| 146 | "rstrip": false, |
| 147 | "normalized": false, |
| 148 | "special": true |
| 149 | }, |
| 150 | { |
| 151 | "id": 616, |
| 152 | "content": "[singing]", |
| 153 | "single_word": false, |
| 154 | "lstrip": false, |
| 155 | "rstrip": false, |
| 156 | "normalized": false, |
| 157 | "special": true |
| 158 | }, |
| 159 | { |
| 160 | "id": 617, |
| 161 | "content": "[music]", |
| 162 | "single_word": false, |
| 163 | "lstrip": false, |
| 164 | "rstrip": false, |
| 165 | "normalized": false, |
| 166 | "special": true |
| 167 | }, |
| 168 | { |
| 169 | "id": 618, |
| 170 | "content": "[whistle]", |
| 171 | "single_word": false, |
| 172 | "lstrip": false, |
| 173 | "rstrip": false, |
| 174 | "normalized": false, |
| 175 | "special": true |
| 176 | }, |
| 177 | { |
| 178 | "id": 619, |
| 179 | "content": "[humming]", |
| 180 | "single_word": false, |
| 181 | "lstrip": false, |
| 182 | "rstrip": false, |
| 183 | "normalized": false, |
| 184 | "special": true |
| 185 | }, |
| 186 | { |
| 187 | "id": 620, |
| 188 | "content": "[gasp]", |
| 189 | "single_word": false, |
| 190 | "lstrip": false, |
| 191 | "rstrip": false, |
| 192 | "normalized": false, |
| 193 | "special": true |
| 194 | }, |
| 195 | { |
| 196 | "id": 621, |
| 197 | "content": "[groan]", |
| 198 | "single_word": false, |
| 199 | "lstrip": false, |
| 200 | "rstrip": false, |
| 201 | "normalized": false, |
| 202 | "special": true |
| 203 | }, |
| 204 | { |
| 205 | "id": 622, |
| 206 | "content": "[whisper]", |
| 207 | "single_word": false, |
| 208 | "lstrip": false, |
| 209 | "rstrip": false, |
| 210 | "normalized": false, |
| 211 | "special": true |
| 212 | }, |
| 213 | { |
| 214 | "id": 623, |
| 215 | "content": "[mumble]", |
| 216 | "single_word": false, |
| 217 | "lstrip": false, |
| 218 | "rstrip": false, |
| 219 | "normalized": false, |
| 220 | "special": true |
| 221 | }, |
| 222 | { |
| 223 | "id": 624, |
| 224 | "content": "[sniff]", |
| 225 | "single_word": false, |
| 226 | "lstrip": false, |
| 227 | "rstrip": false, |
| 228 | "normalized": false, |
| 229 | "special": true |
| 230 | }, |
| 231 | { |
| 232 | "id": 625, |
| 233 | "content": "[sneeze]", |
| 234 | "single_word": false, |
| 235 | "lstrip": false, |
| 236 | "rstrip": false, |
| 237 | "normalized": false, |
| 238 | "special": true |
| 239 | }, |
| 240 | { |
| 241 | "id": 626, |
| 242 | "content": "[cough]", |
| 243 | "single_word": false, |
| 244 | "lstrip": false, |
| 245 | "rstrip": false, |
| 246 | "normalized": false, |
| 247 | "special": true |
| 248 | }, |
| 249 | { |
| 250 | "id": 627, |
| 251 | "content": "[snore]", |
| 252 | "single_word": false, |
| 253 | "lstrip": false, |
| 254 | "rstrip": false, |
| 255 | "normalized": false, |
| 256 | "special": true |
| 257 | }, |
| 258 | { |
| 259 | "id": 628, |
| 260 | "content": "[chew]", |
| 261 | "single_word": false, |
| 262 | "lstrip": false, |
| 263 | "rstrip": false, |
| 264 | "normalized": false, |
| 265 | "special": true |
| 266 | }, |
| 267 | { |
| 268 | "id": 629, |
| 269 | "content": "[sip]", |
| 270 | "single_word": false, |
| 271 | "lstrip": false, |
| 272 | "rstrip": false, |
| 273 | "normalized": false, |
| 274 | "special": true |
| 275 | }, |
| 276 | { |
| 277 | "id": 630, |
| 278 | "content": "[clear_throat]", |
| 279 | "single_word": false, |
| 280 | "lstrip": false, |
| 281 | "rstrip": false, |
| 282 | "normalized": false, |
| 283 | "special": true |
| 284 | }, |
| 285 | { |
| 286 | "id": 631, |
| 287 | "content": "[kiss]", |
| 288 | "single_word": false, |
| 289 | "lstrip": false, |
| 290 | "rstrip": false, |
| 291 | "normalized": false, |
| 292 | "special": true |
| 293 | }, |
| 294 | { |
| 295 | "id": 632, |
| 296 | "content": "[shhh]", |
| 297 | "single_word": false, |
| 298 | "lstrip": false, |
| 299 | "rstrip": false, |
| 300 | "normalized": false, |
| 301 | "special": true |
| 302 | }, |
| 303 | { |
| 304 | "id": 633, |
| 305 | "content": "[gibberish]", |
| 306 | "single_word": false, |
| 307 | "lstrip": false, |
| 308 | "rstrip": false, |
| 309 | "normalized": false, |
| 310 | "special": true |
| 311 | }, |
| 312 | { |
| 313 | "id": 634, |
| 314 | "content": "[fr]", |
| 315 | "single_word": false, |
| 316 | "lstrip": false, |
| 317 | "rstrip": false, |
| 318 | "normalized": false, |
| 319 | "special": true |
| 320 | }, |
| 321 | { |
| 322 | "id": 635, |
| 323 | "content": "[es]", |
| 324 | "single_word": false, |
| 325 | "lstrip": false, |
| 326 | "rstrip": false, |
| 327 | "normalized": false, |
| 328 | "special": true |
| 329 | }, |
| 330 | { |
| 331 | "id": 636, |
| 332 | "content": "[de]", |
| 333 | "single_word": false, |
| 334 | "lstrip": false, |
| 335 | "rstrip": false, |
| 336 | "normalized": false, |
| 337 | "special": true |
| 338 | }, |
| 339 | { |
| 340 | "id": 637, |
| 341 | "content": "[it]", |
| 342 | "single_word": false, |
| 343 | "lstrip": false, |
| 344 | "rstrip": false, |
| 345 | "normalized": false, |
| 346 | "special": true |
| 347 | }, |
| 348 | { |
| 349 | "id": 638, |
| 350 | "content": "[ipa]", |
| 351 | "single_word": false, |
| 352 | "lstrip": false, |
| 353 | "rstrip": false, |
| 354 | "normalized": false, |
| 355 | "special": true |
| 356 | }, |
| 357 | { |
| 358 | "id": 639, |
| 359 | "content": "[end_of_label]", |
| 360 | "single_word": false, |
| 361 | "lstrip": false, |
| 362 | "rstrip": false, |
| 363 | "normalized": false, |
| 364 | "special": true |
| 365 | }, |
| 366 | { |
| 367 | "id": 695, |
| 368 | "content": "[PLACEHOLDER55]", |
| 369 | "single_word": false, |
| 370 | "lstrip": false, |
| 371 | "rstrip": false, |
| 372 | "normalized": false, |
| 373 | "special": true |
| 374 | }, |
| 375 | { |
| 376 | "id": 696, |
| 377 | "content": "[PLACEHOLDER56]", |
| 378 | "single_word": false, |
| 379 | "lstrip": false, |
| 380 | "rstrip": false, |
| 381 | "normalized": false, |
| 382 | "special": true |
| 383 | }, |
| 384 | { |
| 385 | "id": 697, |
| 386 | "content": "[PLACEHOLDER57]", |
| 387 | "single_word": false, |
| 388 | "lstrip": false, |
| 389 | "rstrip": false, |
| 390 | "normalized": false, |
| 391 | "special": true |
| 392 | }, |
| 393 | { |
| 394 | "id": 698, |
| 395 | "content": "[PLACEHOLDER58]", |
| 396 | "single_word": false, |
| 397 | "lstrip": false, |
| 398 | "rstrip": false, |
| 399 | "normalized": false, |
| 400 | "special": true |
| 401 | }, |
| 402 | { |
| 403 | "id": 699, |
| 404 | "content": "[PLACEHOLDER59]", |
| 405 | "single_word": false, |
| 406 | "lstrip": false, |
| 407 | "rstrip": false, |
| 408 | "normalized": false, |
| 409 | "special": true |
| 410 | }, |
| 411 | { |
| 412 | "id": 700, |
| 413 | "content": "[PLACEHOLDER60]", |
| 414 | "single_word": false, |
| 415 | "lstrip": false, |
| 416 | "rstrip": false, |
| 417 | "normalized": false, |
| 418 | "special": true |
| 419 | }, |
| 420 | { |
| 421 | "id": 701, |
| 422 | "content": "[PLACEHOLDER61]", |
| 423 | "single_word": false, |
| 424 | "lstrip": false, |
| 425 | "rstrip": false, |
| 426 | "normalized": false, |
| 427 | "special": true |
| 428 | }, |
| 429 | { |
| 430 | "id": 702, |
| 431 | "content": "[PLACEHOLDER62]", |
| 432 | "single_word": false, |
| 433 | "lstrip": false, |
| 434 | "rstrip": false, |
| 435 | "normalized": false, |
| 436 | "special": true |
| 437 | }, |
| 438 | { |
| 439 | "id": 703, |
| 440 | "content": "[PLACEHOLDER63]", |
| 441 | "single_word": false, |
| 442 | "lstrip": false, |
| 443 | "rstrip": false, |
| 444 | "normalized": false, |
| 445 | "special": true |
| 446 | } |
| 447 | ], |
| 448 | "normalizer": null, |
| 449 | "pre_tokenizer": { |
| 450 | "type": "Whitespace" |
| 451 | }, |
| 452 | "post_processor": null, |
| 453 | "decoder": null, |
| 454 | "model": { |
| 455 | "type": "BPE", |
| 456 | "dropout": null, |
| 457 | "unk_token": "[UNK]", |
| 458 | "continuing_subword_prefix": null, |
| 459 | "end_of_word_suffix": null, |
| 460 | "fuse_unk": false, |
| 461 | "vocab": { |
| 462 | "[STOP]": 0, |
| 463 | "[UNK]": 1, |
| 464 | "[SPACE]": 2, |
| 465 | "!": 3, |
| 466 | "'": 4, |
| 467 | "(": 5, |
| 468 | ")": 6, |
| 469 | ",": 7, |
| 470 | "-": 8, |
| 471 | ".": 9, |
| 472 | "/": 10, |
| 473 | ":": 11, |
| 474 | ";": 12, |
| 475 | "?": 13, |
| 476 | "a": 14, |
| 477 | "b": 15, |
| 478 | "c": 16, |
| 479 | "d": 17, |
| 480 | "e": 18, |
| 481 | "f": 19, |
| 482 | "g": 20, |
| 483 | "h": 21, |
| 484 | "i": 22, |
| 485 | "j": 23, |
| 486 | "k": 24, |
| 487 | "l": 25, |
| 488 | "m": 26, |
| 489 | "n": 27, |
| 490 | "o": 28, |
| 491 | "p": 29, |
| 492 | "q": 30, |
| 493 | "r": 31, |
| 494 | "s": 32, |
| 495 | "t": 33, |
| 496 | "u": 34, |
| 497 | "v": 35, |
| 498 | "w": 36, |
| 499 | "x": 37, |
| 500 | "y": 38, |
| 501 | "z": 39, |
| 502 | "th": 40, |
| 503 | "in": 41, |
| 504 | "the": 42, |
| 505 | "an": 43, |
| 506 | "er": 44, |
| 507 | "ou": 45, |
| 508 | "re": 46, |
| 509 | "on": 47, |
| 510 | "at": 48, |
| 511 | "ed": 49, |
| 512 | "en": 50, |
| 513 | "to": 51, |
| 514 | "ing": 52, |
| 515 | "and": 53, |
| 516 | "is": 54, |
| 517 | "as": 55, |
| 518 | "al": 56, |
| 519 | "or": 57, |
| 520 | "of": 58, |
| 521 | "ar": 59, |
| 522 | "it": 60, |
| 523 | "es": 61, |
| 524 | "he": 62, |
| 525 | "st": 63, |
| 526 | "le": 64, |
| 527 | "om": 65, |
| 528 | "se": 66, |
| 529 | "be": 67, |
| 530 | "ad": 68, |
| 531 | "ow": 69, |
| 532 | "ly": 70, |
| 533 | "ch": 71, |
| 534 | "wh": 72, |
| 535 | "that": 73, |
| 536 | "you": 74, |
| 537 | "li": 75, |
| 538 | "ve": 76, |
| 539 | "ac": 77, |
| 540 | "ti": 78, |
| 541 | "ld": 79, |
| 542 | "me": 80, |
| 543 | "was": 81, |
| 544 | "gh": 82, |
| 545 | "id": 83, |
| 546 | "ll": 84, |
| 547 | "wi": 85, |
| 548 | "ent": 86, |
| 549 | "for": 87, |
| 550 | "ay": 88, |
| 551 | "ro": 89, |
| 552 | "ver": 90, |
| 553 | "ic": 91, |
| 554 | "her": 92, |
| 555 | "ke": 93, |
| 556 | "his": 94, |
| 557 | "no": 95, |
| 558 | "ut": 96, |
| 559 | "un": 97, |
| 560 | "ir": 98, |
| 561 | "lo": 99, |
| 562 | "we": 100, |
| 563 | "ri": 101, |
| 564 | "ha": 102, |
| 565 | "with": 103, |
| 566 | "ght": 104, |
| 567 | "out": 105, |
| 568 | "im": 106, |
| 569 | "ion": 107, |
| 570 | "all": 108, |
| 571 | "ab": 109, |
| 572 | "one": 110, |
| 573 | "ne": 111, |
| 574 | "ge": 112, |
| 575 | "ould": 113, |
| 576 | "ter": 114, |
| 577 | "mo": 115, |
| 578 | "had": 116, |
| 579 | "ce": 117, |
| 580 | "she": 118, |
| 581 | "go": 119, |
| 582 | "sh": 120, |
| 583 | "ur": 121, |
| 584 | "am": 122, |
| 585 | "so": 123, |
| 586 | "pe": 124, |
| 587 | "my": 125, |
| 588 | "de": 126, |
| 589 | "are": 127, |
| 590 | "but": 128, |
| 591 | "ome": 129, |
| 592 | "fr": 130, |
| 593 | "ther": 131, |
| 594 | "fe": 132, |
| 595 | "su": 133, |
| 596 | "do": 134, |
| 597 | "con": 135, |
| 598 | "te": 136, |
| 599 | "ain": 137, |
| 600 | "ere": 138, |
| 601 | "po": 139, |
| 602 | "if": 140, |
| 603 | "they": 141, |
| 604 | "us": 142, |
| 605 | "ag": 143, |
| 606 | "tr": 144, |
| 607 | "now": 145, |
| 608 | "oun": 146, |
| 609 | "this": 147, |
| 610 | "have": 148, |
| 611 | "not": 149, |
| 612 | "sa": 150, |
| 613 | "il": 151, |
| 614 | "up": 152, |
| 615 | "thing": 153, |
| 616 | "from": 154, |
| 617 | "ap": 155, |
| 618 | "him": 156, |
| 619 | "ack": 157, |
| 620 | "ation": 158, |
| 621 | "ant": 159, |
| 622 | "our": 160, |
| 623 | "op": 161, |
| 624 | "like": 162, |
| 625 | "ust": 163, |
| 626 | "ess": 164, |
| 627 | "bo": 165, |
| 628 | "ok": 166, |
| 629 | "ul": 167, |
| 630 | "ind": 168, |
| 631 | "ex": 169, |
| 632 | "com": 170, |
| 633 | "some": 171, |
| 634 | "there": 172, |
| 635 | "ers": 173, |
| 636 | "co": 174, |
| 637 | "res": 175, |
| 638 | "man": 176, |
| 639 | "ard": 177, |
| 640 | "pl": 178, |
| 641 | "wor": 179, |
| 642 | "way": 180, |
| 643 | "tion": 181, |
| 644 | "fo": 182, |
| 645 | "ca": 183, |
| 646 | "were": 184, |
| 647 | "by": 185, |
| 648 | "ate": 186, |
| 649 | "pro": 187, |
| 650 | "ted": 188, |
| 651 | "ound": 189, |
| 652 | "own": 190, |
| 653 | "would": 191, |
| 654 | "ts": 192, |
| 655 | "what": 193, |
| 656 | "qu": 194, |
| 657 | "ally": 195, |
| 658 | "ight": 196, |
| 659 | "ck": 197, |
| 660 | "gr": 198, |
| 661 | "when": 199, |
| 662 | "ven": 200, |
| 663 | "can": 201, |
| 664 | "ough": 202, |
| 665 | "ine": 203, |
| 666 | "end": 204, |
| 667 | "per": 205, |
| 668 | "ous": 206, |
| 669 | "od": 207, |
| 670 | "ide": 208, |
| 671 | "know": 209, |
| 672 | "ty": 210, |
| 673 | "very": 211, |
| 674 | "si": 212, |
| 675 | "ak": 213, |
| 676 | "who": 214, |
| 677 | "about": 215, |
| 678 | "ill": 216, |
| 679 | "them": 217, |
| 680 | "est": 218, |
| 681 | "red": 219, |
| 682 | "ye": 220, |
| 683 | "could": 221, |
| 684 | "ong": 222, |
| 685 | "your": 223, |
| 686 | "their": 224, |
| 687 | "em": 225, |
| 688 | "just": 226, |
| 689 | "other": 227, |
| 690 | "into": 228, |
| 691 | "any": 229, |
| 692 | "whi": 230, |
| 693 | "um": 231, |
| 694 | "tw": 232, |
| 695 | "ast": 233, |
| 696 | "der": 234, |
| 697 | "did": 235, |
| 698 | "ie": 236, |
| 699 | "been": 237, |
| 700 | "ace": 238, |
| 701 | "ink": 239, |
| 702 | "ity": 240, |
| 703 | "back": 241, |
| 704 | "ting": 242, |
| 705 | "br": 243, |
| 706 | "more": 244, |
| 707 | "ake": 245, |
| 708 | "pp": 246, |
| 709 | "then": 247, |
| 710 | "sp": 248, |
| 711 | "el": 249, |
| 712 | "use": 250, |
| 713 | "bl": 251, |
| 714 | "said": 252, |
| 715 | "over": 253, |
| 716 | "get": 254, |
| 717 | "[START]": 255, |
| 718 | "\"": 256, |
| 719 | "#": 257, |
| 720 | "$": 258, |
| 721 | "%": 259, |
| 722 | "&": 260, |
| 723 | "*": 261, |
| 724 | "+": 262, |
| 725 | "0": 263, |
| 726 | "1": 264, |
| 727 | "2": 265, |
| 728 | "3": 266, |
| 729 | "4": 267, |
| 730 | "5": 268, |
| 731 | "6": 269, |
| 732 | "7": 270, |
| 733 | "8": 271, |
| 734 | "9": 272, |
| 735 | "<": 273, |
| 736 | "=": 274, |
| 737 | ">": 275, |
| 738 | "@": 276, |
| 739 | "A": 277, |
| 740 | "B": 278, |
| 741 | "C": 279, |
| 742 | "D": 280, |
| 743 | "E": 281, |
| 744 | "F": 282, |
| 745 | "G": 283, |
| 746 | "H": 284, |
| 747 | "I": 285, |
| 748 | "J": 286, |
| 749 | "K": 287, |
| 750 | "L": 288, |
| 751 | "M": 289, |
| 752 | "N": 290, |
| 753 | "O": 291, |
| 754 | "P": 292, |
| 755 | "Q": 293, |
| 756 | "R": 294, |
| 757 | "S": 295, |
| 758 | "T": 296, |
| 759 | "U": 297, |
| 760 | "V": 298, |
| 761 | "W": 299, |
| 762 | "X": 300, |
| 763 | "Y": 301, |
| 764 | "Z": 302, |
| 765 | "[": 303, |
| 766 | "\\": 304, |
| 767 | "]": 305, |
| 768 | "^": 306, |
| 769 | "_": 307, |
| 770 | "`": 308, |
| 771 | "{": 309, |
| 772 | "|": 310, |
| 773 | "}": 311, |
| 774 | "~": 312, |
| 775 | "‐": 313, |
| 776 | "‑": 314, |
| 777 | "‒": 315, |
| 778 | "–": 316, |
| 779 | "—": 317, |
| 780 | "―": 318, |
| 781 | "‖": 319, |
| 782 | "‗": 320, |
| 783 | "‘": 321, |
| 784 | "’": 322, |
| 785 | "‚": 323, |
| 786 | "‛": 324, |
| 787 | "“": 325, |
| 788 | "”": 326, |
| 789 | "„": 327, |
| 790 | "‟": 328, |
| 791 | " ": 329, |
| 792 | "¡": 330, |
| 793 | "¢": 331, |
| 794 | "£": 332, |
| 795 | "¤": 333, |
| 796 | "¥": 334, |
| 797 | "¦": 335, |
| 798 | "§": 336, |
| 799 | "¨": 337, |
| 800 | "©": 338, |
| 801 | "ª": 339, |
| 802 | "«": 340, |
| 803 | "¬": 341, |
| 804 | "": 342, |
| 805 | "®": 343, |
| 806 | "¯": 344, |
| 807 | "°": 345, |
| 808 | "±": 346, |
| 809 | "²": 347, |
| 810 | "³": 348, |
| 811 | "´": 349, |
| 812 | "µ": 350, |
| 813 | "¶": 351, |
| 814 | "·": 352, |
| 815 | "¸": 353, |
| 816 | "¹": 354, |
| 817 | "º": 355, |
| 818 | "»": 356, |
| 819 | "¼": 357, |
| 820 | "½": 358, |
| 821 | "¾": 359, |
| 822 | "¿": 360, |
| 823 | "À": 361, |
| 824 | "Á": 362, |
| 825 | "Â": 363, |
| 826 | "Ã": 364, |
| 827 | "Ä": 365, |
| 828 | "Å": 366, |
| 829 | "Æ": 367, |
| 830 | "Ç": 368, |
| 831 | "È": 369, |
| 832 | "É": 370, |
| 833 | "Ê": 371, |
| 834 | "Ë": 372, |
| 835 | "Ì": 373, |
| 836 | "Í": 374, |
| 837 | "Î": 375, |
| 838 | "Ï": 376, |
| 839 | "Ð": 377, |
| 840 | "Ñ": 378, |
| 841 | "Ò": 379, |
| 842 | "Ó": 380, |
| 843 | "Ô": 381, |
| 844 | "Õ": 382, |
| 845 | "Ö": 383, |
| 846 | "×": 384, |
| 847 | "Ø": 385, |
| 848 | "Ù": 386, |
| 849 | "Ú": 387, |
| 850 | "Û": 388, |
| 851 | "Ü": 389, |
| 852 | "Ý": 390, |
| 853 | "Þ": 391, |
| 854 | "ß": 392, |
| 855 | "à": 393, |
| 856 | "á": 394, |
| 857 | "â": 395, |
| 858 | "ã": 396, |
| 859 | "ä": 397, |
| 860 | "å": 398, |
| 861 | "æ": 399, |
| 862 | "ç": 400, |
| 863 | "è": 401, |
| 864 | "é": 402, |
| 865 | "ê": 403, |
| 866 | "ë": 404, |
| 867 | "ì": 405, |
| 868 | "í": 406, |
| 869 | "î": 407, |
| 870 | "ï": 408, |
| 871 | "ð": 409, |
| 872 | "ñ": 410, |
| 873 | "ò": 411, |
| 874 | "ó": 412, |
| 875 | "ô": 413, |
| 876 | "õ": 414, |
| 877 | "ö": 415, |
| 878 | "÷": 416, |
| 879 | "ø": 417, |
| 880 | "ù": 418, |
| 881 | "ú": 419, |
| 882 | "û": 420, |
| 883 | "ü": 421, |
| 884 | "ý": 422, |
| 885 | "þ": 423, |
| 886 | "ÿ": 424, |
| 887 | "ɐ": 425, |
| 888 | "ɑ": 426, |
| 889 | "ɒ": 427, |
| 890 | "ɓ": 428, |
| 891 | "ɔ": 429, |
| 892 | "ɕ": 430, |
| 893 | "ɖ": 431, |
| 894 | "ɗ": 432, |
| 895 | "ɘ": 433, |
| 896 | "ə": 434, |
| 897 | "ɚ": 435, |
| 898 | "ɛ": 436, |
| 899 | "ɜ": 437, |
| 900 | "ɝ": 438, |
| 901 | "ɞ": 439, |
| 902 | "ɟ": 440, |
| 903 | "ɠ": 441, |
| 904 | "ɡ": 442, |
| 905 | "ɢ": 443, |
| 906 | "ɣ": 444, |
| 907 | "ɤ": 445, |
| 908 | "ɥ": 446, |
| 909 | "ɦ": 447, |
| 910 | "ɧ": 448, |
| 911 | "ɨ": 449, |
| 912 | "ɩ": 450, |
| 913 | "ɪ": 451, |
| 914 | "ɫ": 452, |
| 915 | "ɬ": 453, |
| 916 | "ɭ": 454, |
| 917 | "ɮ": 455, |
| 918 | "ɯ": 456, |
| 919 | "ɰ": 457, |
| 920 | "ɱ": 458, |
| 921 | "ɲ": 459, |
| 922 | "ɳ": 460, |
| 923 | "ɴ": 461, |
| 924 | "ɵ": 462, |
| 925 | "ɶ": 463, |
| 926 | "ɷ": 464, |
| 927 | "ɸ": 465, |
| 928 | "ɹ": 466, |
| 929 | "ɺ": 467, |
| 930 | "ɻ": 468, |
| 931 | "ɼ": 469, |
| 932 | "ɽ": 470, |
| 933 | "ɾ": 471, |
| 934 | "ɿ": 472, |
| 935 | "ʀ": 473, |
| 936 | "ʁ": 474, |
| 937 | "ʂ": 475, |
| 938 | "ʃ": 476, |
| 939 | "ʄ": 477, |
| 940 | "ʅ": 478, |
| 941 | "ʆ": 479, |
| 942 | "ʇ": 480, |
| 943 | "ʈ": 481, |
| 944 | "ʉ": 482, |
| 945 | "ʊ": 483, |
| 946 | "ʋ": 484, |
| 947 | "ʌ": 485, |
| 948 | "ʍ": 486, |
| 949 | "ʎ": 487, |
| 950 | "ʏ": 488, |
| 951 | "ʐ": 489, |
| 952 | "ʑ": 490, |
| 953 | "ʒ": 491, |
| 954 | "ʓ": 492, |
| 955 | "ʔ": 493, |
| 956 | "ʕ": 494, |
| 957 | "ʖ": 495, |
| 958 | "ʗ": 496, |
| 959 | "ʘ": 497, |
| 960 | "ʙ": 498, |
| 961 | "ʚ": 499, |
| 962 | "ʛ": 500, |
| 963 | "ʜ": 501, |
| 964 | "ʝ": 502, |
| 965 | "ʞ": 503, |
| 966 | "ʟ": 504, |
| 967 | "ʠ": 505, |
| 968 | "ʡ": 506, |
| 969 | "ʢ": 507, |
| 970 | "ʣ": 508, |
| 971 | "ʤ": 509, |
| 972 | "ʥ": 510, |
| 973 | "ʦ": 511, |
| 974 | "ʧ": 512, |
| 975 | "ʨ": 513, |
| 976 | "ʩ": 514, |
| 977 | "ʪ": 515, |
| 978 | "ʫ": 516, |
| 979 | "ʬ": 517, |
| 980 | "ʭ": 518, |
| 981 | "ʮ": 519, |
| 982 | "ʯ": 520, |
| 983 | "ʰ": 521, |
| 984 | "ʱ": 522, |
| 985 | "ʲ": 523, |
| 986 | "ʳ": 524, |
| 987 | "ʴ": 525, |
| 988 | "ʵ": 526, |
| 989 | "ʶ": 527, |
| 990 | "ʷ": 528, |
| 991 | "ʸ": 529, |
| 992 | "ʹ": 530, |
| 993 | "ʺ": 531, |
| 994 | "ʻ": 532, |
| 995 | "ʼ": 533, |
| 996 | "ʽ": 534, |
| 997 | "ʾ": 535, |
| 998 | "ʿ": 536, |
| 999 | "ˀ": 537, |
| 1000 | "ˁ": 538, |
| 1001 | "˂": 539, |
| 1002 | "˃": 540, |
| 1003 | "˄": 541, |
| 1004 | "˅": 542, |
| 1005 | "ˆ": 543, |
| 1006 | "ˇ": 544, |
| 1007 | "ˈ": 545, |
| 1008 | "ˉ": 546, |
| 1009 | "ˊ": 547, |
| 1010 | "ˋ": 548, |
| 1011 | "ˌ": 549, |
| 1012 | "ˍ": 550, |
| 1013 | "ˎ": 551, |
| 1014 | "ˏ": 552, |
| 1015 | "ː": 553, |
| 1016 | "ˑ": 554, |
| 1017 | "˒": 555, |
| 1018 | "˓": 556, |
| 1019 | "˔": 557, |
| 1020 | "˕": 558, |
| 1021 | "˖": 559, |
| 1022 | "˗": 560, |
| 1023 | "˘": 561, |
| 1024 | "˙": 562, |
| 1025 | "˚": 563, |
| 1026 | "˛": 564, |
| 1027 | "˜": 565, |
| 1028 | "˝": 566, |
| 1029 | "˞": 567, |
| 1030 | "˟": 568, |
| 1031 | "ˠ": 569, |
| 1032 | "ˡ": 570, |
| 1033 | "ˢ": 571, |
| 1034 | "ˣ": 572, |
| 1035 | "ˤ": 573, |
| 1036 | "˥": 574, |
| 1037 | "˦": 575, |
| 1038 | "˧": 576, |
| 1039 | "˨": 577, |
| 1040 | "˩": 578, |
| 1041 | "˪": 579, |
| 1042 | "˫": 580, |
| 1043 | "ˬ": 581, |
| 1044 | "˭": 582, |
| 1045 | "ˮ": 583, |
| 1046 | "˯": 584, |
| 1047 | "˰": 585, |
| 1048 | "˱": 586, |
| 1049 | "˲": 587, |
| 1050 | "˳": 588, |
| 1051 | "˴": 589, |
| 1052 | "˵": 590, |
| 1053 | "˶": 591, |
| 1054 | "˷": 592, |
| 1055 | "˸": 593, |
| 1056 | "˹": 594, |
| 1057 | "˺": 595, |
| 1058 | "˻": 596, |
| 1059 | "˼": 597, |
| 1060 | "˽": 598, |
| 1061 | "˾": 599, |
| 1062 | "˿": 600, |
| 1063 | "ā": 601, |
| 1064 | "ō": 602, |
| 1065 | "…": 603, |
| 1066 | "[UH]": 604, |
| 1067 | "[UM]": 605, |
| 1068 | "[giggle]": 606, |
| 1069 | "[laughter]": 607, |
| 1070 | "[guffaw]": 608, |
| 1071 | "[inhale]": 609, |
| 1072 | "[exhale]": 610, |
| 1073 | "[sigh]": 611, |
| 1074 | "[cry]": 612, |
| 1075 | "[bark]": 613, |
| 1076 | "[howl]": 614, |
| 1077 | "[meow]": 615, |
| 1078 | "[singing]": 616, |
| 1079 | "[music]": 617, |
| 1080 | "[whistle]": 618, |
| 1081 | "[humming]": 619, |
| 1082 | "[gasp]": 620, |
| 1083 | "[groan]": 621, |
| 1084 | "[whisper]": 622, |
| 1085 | "[mumble]": 623, |
| 1086 | "[sniff]": 624, |
| 1087 | "[sneeze]": 625, |
| 1088 | "[cough]": 626, |
| 1089 | "[snore]": 627, |
| 1090 | "[chew]": 628, |
| 1091 | "[sip]": 629, |
| 1092 | "[clear_throat]": 630, |
| 1093 | "[kiss]": 631, |
| 1094 | "[shhh]": 632, |
| 1095 | "[gibberish]": 633, |
| 1096 | "[fr]": 634, |
| 1097 | "[es]": 635, |
| 1098 | "[de]": 636, |
| 1099 | "[it]": 637, |
| 1100 | "[ipa]": 638, |
| 1101 | "[end_of_label]": 639, |
| 1102 | "ŋ": 640, |
| 1103 | "ᵻ": 641, |
| 1104 | "θ": 642, |
| 1105 | "̩": 643, |
| 1106 | "\u0303": 644, |
| 1107 | "ɑː": 645, |
| 1108 | "iː": 646, |
| 1109 | "uː": 647, |
| 1110 | "ɜː": 648, |
| 1111 | "ɔː": 649, |
| 1112 | "oː": 650, |
| 1113 | "eɪ": 651, |
| 1114 | "oʊ": 652, |
| 1115 | "aɪ": 653, |
| 1116 | "aʊ": 654, |
| 1117 | "ɔɪ": 655, |
| 1118 | "dʒ": 656, |
| 1119 | "tʃ": 657, |
| 1120 | "ɪŋ": 658, |
| 1121 | "ᵻd": 659, |
| 1122 | "ˈiː": 660, |
| 1123 | "ˌiː": 661, |
| 1124 | "ˈɪ": 662, |
| 1125 | "ˌɪ": 663, |
| 1126 | "ˈeɪ": 664, |
| 1127 | "ˌeɪ": 665, |
| 1128 | "ˈɛ": 666, |
| 1129 | "ˌɛ": 667, |
| 1130 | "ˈæ": 668, |
| 1131 | "ˌæ": 669, |
| 1132 | "ˈɑː": 670, |
| 1133 | "ˌɑː": 671, |
| 1134 | "ˈɔː": 672, |
| 1135 | "ˌɔː": 673, |
| 1136 | "oːɹ": 674, |
| 1137 | "ˈoːɹ": 675, |
| 1138 | "ˌoːɹ": 676, |
| 1139 | "ˈoʊ": 677, |
| 1140 | "ˌoʊ": 678, |
| 1141 | "ˈʊ": 679, |
| 1142 | "ˌʊ": 680, |
| 1143 | "ˈuː": 681, |
| 1144 | "ˌuː": 682, |
| 1145 | "ˈɜː": 683, |
| 1146 | "ˌɜː": 684, |
| 1147 | "ˈʌ": 685, |
| 1148 | "ˌʌ": 686, |
| 1149 | "ˈaɪ": 687, |
| 1150 | "ˌaɪ": 688, |
| 1151 | "ˈaʊ": 689, |
| 1152 | "ˌaʊ": 690, |
| 1153 | "ˈɔɪ": 691, |
| 1154 | "ˌɔɪ": 692, |
| 1155 | "ˈɚ": 693, |
| 1156 | "ˌɐ": 694, |
| 1157 | "[PLACEHOLDER55]": 695, |
| 1158 | "[PLACEHOLDER56]": 696, |
| 1159 | "[PLACEHOLDER57]": 697, |
| 1160 | "[PLACEHOLDER58]": 698, |
| 1161 | "[PLACEHOLDER59]": 699, |
| 1162 | "[PLACEHOLDER60]": 700, |
| 1163 | "[PLACEHOLDER61]": 701, |
| 1164 | "[PLACEHOLDER62]": 702, |
| 1165 | "[PLACEHOLDER63]": 703 |
| 1166 | }, |
| 1167 | "merges": [ |
| 1168 | "t h", |
| 1169 | "i n", |
| 1170 | "th e", |
| 1171 | "a n", |
| 1172 | "e r", |
| 1173 | "o u", |
| 1174 | "r e", |
| 1175 | "o n", |
| 1176 | "a t", |
| 1177 | "e d", |
| 1178 | "e n", |
| 1179 | "t o", |
| 1180 | "in g", |
| 1181 | "an d", |
| 1182 | "i s", |
| 1183 | "a s", |
| 1184 | "a l", |
| 1185 | "o r", |
| 1186 | "o f", |
| 1187 | "a r", |
| 1188 | "i t", |
| 1189 | "e s", |
| 1190 | "h e", |
| 1191 | "s t", |
| 1192 | "l e", |
| 1193 | "o m", |
| 1194 | "s e", |
| 1195 | "b e", |
| 1196 | "a d", |
| 1197 | "o w", |
| 1198 | "l y", |
| 1199 | "c h", |
| 1200 | "w h", |
| 1201 | "th at", |
| 1202 | "y ou", |
| 1203 | "l i", |
| 1204 | "v e", |
| 1205 | "a c", |
| 1206 | "t i", |
| 1207 | "l d", |
| 1208 | "m e", |
| 1209 | "w as", |
| 1210 | "g h", |
| 1211 | "i d", |
| 1212 | "l l", |
| 1213 | "w i", |
| 1214 | "en t", |
| 1215 | "f or", |
| 1216 | "a y", |
| 1217 | "r o", |
| 1218 | "v er", |
| 1219 | "i c", |
| 1220 | "h er", |
| 1221 | "k e", |
| 1222 | "h is", |
| 1223 | "n o", |
| 1224 | "u t", |
| 1225 | "u n", |
| 1226 | "i r", |
| 1227 | "l o", |
| 1228 | "w e", |
| 1229 | "r i", |
| 1230 | "h a", |
| 1231 | "wi th", |
| 1232 | "gh t", |
| 1233 | "ou t", |
| 1234 | "i m", |
| 1235 | "i on", |
| 1236 | "al l", |
| 1237 | "a b", |
| 1238 | "on e", |
| 1239 | "n e", |
| 1240 | "g e", |
| 1241 | "ou ld", |
| 1242 | "t er", |
| 1243 | "m o", |
| 1244 | "h ad", |
| 1245 | "c e", |
| 1246 | "s he", |
| 1247 | "g o", |
| 1248 | "s h", |
| 1249 | "u r", |
| 1250 | "a m", |
| 1251 | "s o", |
| 1252 | "p e", |
| 1253 | "m y", |
| 1254 | "d e", |
| 1255 | "a re", |
| 1256 | "b ut", |
| 1257 | "om e", |
| 1258 | "f r", |
| 1259 | "the r", |
| 1260 | "f e", |
| 1261 | "s u", |
| 1262 | "d o", |
| 1263 | "c on", |
| 1264 | "t e", |
| 1265 | "a in", |
| 1266 | "er e", |
| 1267 | "p o", |
| 1268 | "i f", |
| 1269 | "the y", |
| 1270 | "u s", |
| 1271 | "a g", |
| 1272 | "t r", |
| 1273 | "n ow", |
| 1274 | "ou n", |
| 1275 | "th is", |
| 1276 | "ha ve", |
| 1277 | "no t", |
| 1278 | "s a", |
| 1279 | "i l", |
| 1280 | "u p", |
| 1281 | "th ing", |
| 1282 | "fr om", |
| 1283 | "a p", |
| 1284 | "h im", |
| 1285 | "ac k", |
| 1286 | "at ion", |
| 1287 | "an t", |
| 1288 | "ou r", |
| 1289 | "o p", |
| 1290 | "li ke", |
| 1291 | "u st", |
| 1292 | "es s", |
| 1293 | "b o", |
| 1294 | "o k", |
| 1295 | "u l", |
| 1296 | "in d", |
| 1297 | "e x", |
| 1298 | "c om", |
| 1299 | "s ome", |
| 1300 | "the re", |
| 1301 | "er s", |
| 1302 | "c o", |
| 1303 | "re s", |
| 1304 | "m an", |
| 1305 | "ar d", |
| 1306 | "p l", |
| 1307 | "w or", |
| 1308 | "w ay", |
| 1309 | "ti on", |
| 1310 | "f o", |
| 1311 | "c a", |
| 1312 | "w ere", |
| 1313 | "b y", |
| 1314 | "at e", |
| 1315 | "p ro", |
| 1316 | "t ed", |
| 1317 | "oun d", |
| 1318 | "ow n", |
| 1319 | "w ould", |
| 1320 | "t s", |
| 1321 | "wh at", |
| 1322 | "q u", |
| 1323 | "al ly", |
| 1324 | "i ght", |
| 1325 | "c k", |
| 1326 | "g r", |
| 1327 | "wh en", |
| 1328 | "v en", |
| 1329 | "c an", |
| 1330 | "ou gh", |
| 1331 | "in e", |
| 1332 | "en d", |
| 1333 | "p er", |
| 1334 | "ou s", |
| 1335 | "o d", |
| 1336 | "id e", |
| 1337 | "k now", |
| 1338 | "t y", |
| 1339 | "ver y", |
| 1340 | "s i", |
| 1341 | "a k", |
| 1342 | "wh o", |
| 1343 | "ab out", |
| 1344 | "i ll", |
| 1345 | "the m", |
| 1346 | "es t", |
| 1347 | "re d", |
| 1348 | "y e", |
| 1349 | "c ould", |
| 1350 | "on g", |
| 1351 | "you r", |
| 1352 | "the ir", |
| 1353 | "e m", |
| 1354 | "j ust", |
| 1355 | "o ther", |
| 1356 | "in to", |
| 1357 | "an y", |
| 1358 | "wh i", |
| 1359 | "u m", |
| 1360 | "t w", |
| 1361 | "as t", |
| 1362 | "d er", |
| 1363 | "d id", |
| 1364 | "i e", |
| 1365 | "be en", |
| 1366 | "ac e", |
| 1367 | "in k", |
| 1368 | "it y", |
| 1369 | "b ack", |
| 1370 | "t ing", |
| 1371 | "b r", |
| 1372 | "mo re", |
| 1373 | "a ke", |
| 1374 | "p p", |
| 1375 | "the n", |
| 1376 | "s p", |
| 1377 | "e l", |
| 1378 | "u se", |
| 1379 | "b l", |
| 1380 | "sa id", |
| 1381 | "o ver", |
| 1382 | "ge t", |
| 1383 | "ɑ ː", |
| 1384 | "i ː", |
| 1385 | "u ː", |
| 1386 | "ɜ ː", |
| 1387 | "ɔ ː", |
| 1388 | "o ː", |
| 1389 | "e ɪ", |
| 1390 | "o ʊ", |
| 1391 | "a ɪ", |
| 1392 | "a ʊ", |
| 1393 | "ɔ ɪ", |
| 1394 | "d ʒ", |
| 1395 | "t ʃ", |
| 1396 | "ɪ ŋ", |
| 1397 | "ᵻ d", |
| 1398 | "ˈ iː", |
| 1399 | "ˌ iː", |
| 1400 | "ˈ ɪ", |
| 1401 | "ˌ ɪ", |
| 1402 | "ˈ eɪ", |
| 1403 | "ˌ eɪ", |
| 1404 | "ˈ ɛ", |
| 1405 | "ˌ ɛ", |
| 1406 | "ˈ æ", |
| 1407 | "ˌ æ", |
| 1408 | "ˈ ɑː", |
| 1409 | "ˌ ɑː", |
| 1410 | "ˈ ɔː", |
| 1411 | "ˌ ɔː", |
| 1412 | "oː ɹ", |
| 1413 | "ˈ oːɹ", |
| 1414 | "ˌ oːɹ", |
| 1415 | "ˈ oʊ", |
| 1416 | "ˌ oʊ", |
| 1417 | "ˈ ʊ", |
| 1418 | "ˌ ʊ", |
| 1419 | "ˈ uː", |
| 1420 | "ˌ uː", |
| 1421 | "ˈ ɜː", |
| 1422 | "ˌ ɜː", |
| 1423 | "ˈ ʌ", |
| 1424 | "ˌ ʌ", |
| 1425 | "ˈ aɪ", |
| 1426 | "ˌ aɪ", |
| 1427 | "ˈ aʊ", |
| 1428 | "ˌ aʊ", |
| 1429 | "ˈ ɔɪ", |
| 1430 | "ˌ ɔɪ", |
| 1431 | "ˈ ɚ", |
| 1432 | "ˌ ɐ" |
| 1433 | ] |
| 1434 | } |
| 1435 | } |