README.md
| 1 | --- |
| 2 | base_model: |
| 3 | - Qwen/Qwen3-0.6B |
| 4 | language: |
| 5 | - aae |
| 6 | - aal |
| 7 | - aao |
| 8 | - ab |
| 9 | - abb |
| 10 | - abn |
| 11 | - abr |
| 12 | - abs |
| 13 | - abv |
| 14 | - acm |
| 15 | - acw |
| 16 | - acx |
| 17 | - adf |
| 18 | - adx |
| 19 | - ady |
| 20 | - aeb |
| 21 | - aec |
| 22 | - af |
| 23 | - afb |
| 24 | - afo |
| 25 | - ahl |
| 26 | - ahs |
| 27 | - ajg |
| 28 | - aju |
| 29 | - ala |
| 30 | - aln |
| 31 | - alo |
| 32 | - am |
| 33 | - amu |
| 34 | - an |
| 35 | - anc |
| 36 | - ank |
| 37 | - anp |
| 38 | - anw |
| 39 | - aom |
| 40 | - apc |
| 41 | - apd |
| 42 | - arb |
| 43 | - arq |
| 44 | - ars |
| 45 | - ary |
| 46 | - arz |
| 47 | - as |
| 48 | - ast |
| 49 | - avl |
| 50 | - awo |
| 51 | - ayl |
| 52 | - ayp |
| 53 | - az |
| 54 | - ba |
| 55 | - bag |
| 56 | - bas |
| 57 | - bax |
| 58 | - bba |
| 59 | - bbj |
| 60 | - bbl |
| 61 | - bbu |
| 62 | - bce |
| 63 | - bci |
| 64 | - bcs |
| 65 | - bcy |
| 66 | - bda |
| 67 | - bde |
| 68 | - bdm |
| 69 | - be |
| 70 | - beb |
| 71 | - bew |
| 72 | - bfd |
| 73 | - bft |
| 74 | - bg |
| 75 | - bgp |
| 76 | - bhb |
| 77 | - bhh |
| 78 | - bho |
| 79 | - bhp |
| 80 | - bhr |
| 81 | - bjj |
| 82 | - bjk |
| 83 | - bjn |
| 84 | - bjt |
| 85 | - bkh |
| 86 | - bkm |
| 87 | - bky |
| 88 | - bmm |
| 89 | - bmq |
| 90 | - bn |
| 91 | - bnm |
| 92 | - bnn |
| 93 | - bns |
| 94 | - bo |
| 95 | - bou |
| 96 | - bqg |
| 97 | - br |
| 98 | - bra |
| 99 | - brh |
| 100 | - bri |
| 101 | - brx |
| 102 | - bs |
| 103 | - bsh |
| 104 | - bsj |
| 105 | - bsk |
| 106 | - btm |
| 107 | - btv |
| 108 | - bug |
| 109 | - bum |
| 110 | - buo |
| 111 | - bux |
| 112 | - bwr |
| 113 | - bxf |
| 114 | - byc |
| 115 | - bys |
| 116 | - byv |
| 117 | - byx |
| 118 | - bzc |
| 119 | - bzw |
| 120 | - ca |
| 121 | - ccg |
| 122 | - ceb |
| 123 | - cen |
| 124 | - cfa |
| 125 | - cgg |
| 126 | - chq |
| 127 | - cjk |
| 128 | - ckb |
| 129 | - ckl |
| 130 | - ckr |
| 131 | - cky |
| 132 | - cnh |
| 133 | - cpy |
| 134 | - cs |
| 135 | - cte |
| 136 | - ctl |
| 137 | - cut |
| 138 | - cux |
| 139 | - cv |
| 140 | - cy |
| 141 | - da |
| 142 | - dag |
| 143 | - dar |
| 144 | - dav |
| 145 | - dbd |
| 146 | - dcc |
| 147 | - de |
| 148 | - deg |
| 149 | - dgh |
| 150 | - dgo |
| 151 | - dje |
| 152 | - dmk |
| 153 | - dml |
| 154 | - dru |
| 155 | - dty |
| 156 | - dua |
| 157 | - dv |
| 158 | - dyu |
| 159 | - dzg |
| 160 | - ebr |
| 161 | - ebu |
| 162 | - ego |
| 163 | - eiv |
| 164 | - eko |
| 165 | - ekr |
| 166 | - el |
| 167 | - elm |
| 168 | - en |
| 169 | - eo |
| 170 | - es |
| 171 | - esu |
| 172 | - et |
| 173 | - eto |
| 174 | - ets |
| 175 | - etu |
| 176 | - eu |
| 177 | - ewo |
| 178 | - ext |
| 179 | - eyo |
| 180 | - fa |
| 181 | - fan |
| 182 | - fat |
| 183 | - ff |
| 184 | - ffm |
| 185 | - fi |
| 186 | - fia |
| 187 | - fil |
| 188 | - fip |
| 189 | - fkk |
| 190 | - fmp |
| 191 | - fr |
| 192 | - fub |
| 193 | - fuc |
| 194 | - fue |
| 195 | - fuf |
| 196 | - fuh |
| 197 | - fui |
| 198 | - fuq |
| 199 | - fuv |
| 200 | - fy |
| 201 | - ga |
| 202 | - gbm |
| 203 | - gbr |
| 204 | - gby |
| 205 | - gcc |
| 206 | - gdf |
| 207 | - gej |
| 208 | - ges |
| 209 | - ggg |
| 210 | - gid |
| 211 | - gig |
| 212 | - giz |
| 213 | - gjk |
| 214 | - gju |
| 215 | - gl |
| 216 | - glw |
| 217 | - gn |
| 218 | - gol |
| 219 | - gom |
| 220 | - gsl |
| 221 | - gu |
| 222 | - gui |
| 223 | - gur |
| 224 | - guz |
| 225 | - gv |
| 226 | - gwc |
| 227 | - gwe |
| 228 | - gwt |
| 229 | - gya |
| 230 | - gyz |
| 231 | - ha |
| 232 | - hah |
| 233 | - hao |
| 234 | - haw |
| 235 | - haz |
| 236 | - hbb |
| 237 | - he |
| 238 | - hem |
| 239 | - hi |
| 240 | - hia |
| 241 | - hkk |
| 242 | - hla |
| 243 | - hno |
| 244 | - hoj |
| 245 | - hr |
| 246 | - hsb |
| 247 | - ht |
| 248 | - hu |
| 249 | - hue |
| 250 | - hul |
| 251 | - hux |
| 252 | - hwo |
| 253 | - hy |
| 254 | - hz |
| 255 | - ia |
| 256 | - ibb |
| 257 | - id |
| 258 | - ida |
| 259 | - idu |
| 260 | - ig |
| 261 | - ijc |
| 262 | - ijn |
| 263 | - ik |
| 264 | - ikw |
| 265 | - is |
| 266 | - ish |
| 267 | - iso |
| 268 | - it |
| 269 | - its |
| 270 | - itw |
| 271 | - itz |
| 272 | - ja |
| 273 | - jal |
| 274 | - jax |
| 275 | - jgo |
| 276 | - jmx |
| 277 | - jns |
| 278 | - jqr |
| 279 | - juk |
| 280 | - juo |
| 281 | - jv |
| 282 | - ka |
| 283 | - kab |
| 284 | - kai |
| 285 | - kaj |
| 286 | - kam |
| 287 | - kbd |
| 288 | - kbl |
| 289 | - kbt |
| 290 | - kcq |
| 291 | - kdh |
| 292 | - kea |
| 293 | - keu |
| 294 | - kfe |
| 295 | - kfk |
| 296 | - kfp |
| 297 | - khg |
| 298 | - khw |
| 299 | - kj |
| 300 | - kjc |
| 301 | - kjk |
| 302 | - kk |
| 303 | - kln |
| 304 | - kls |
| 305 | - km |
| 306 | - kmr |
| 307 | - kmy |
| 308 | - kn |
| 309 | - kna |
| 310 | - knn |
| 311 | - ko |
| 312 | - kol |
| 313 | - koo |
| 314 | - kpo |
| 315 | - kqo |
| 316 | - ks |
| 317 | - ksd |
| 318 | - ksf |
| 319 | - kto |
| 320 | - kuh |
| 321 | - kvx |
| 322 | - kw |
| 323 | - kwm |
| 324 | - kxp |
| 325 | - ky |
| 326 | - kyx |
| 327 | - lag |
| 328 | - lb |
| 329 | - lcm |
| 330 | - ldb |
| 331 | - lg |
| 332 | - lij |
| 333 | - lir |
| 334 | - lkb |
| 335 | - lla |
| 336 | - ln |
| 337 | - lnu |
| 338 | - lo |
| 339 | - loa |
| 340 | - lrk |
| 341 | - lss |
| 342 | - lt |
| 343 | - ltg |
| 344 | - lto |
| 345 | - lua |
| 346 | - luo |
| 347 | - lus |
| 348 | - lv |
| 349 | - lwg |
| 350 | - mab |
| 351 | - maf |
| 352 | - mai |
| 353 | - mau |
| 354 | - max |
| 355 | - mbo |
| 356 | - mcf |
| 357 | - mcn |
| 358 | - mcx |
| 359 | - mdd |
| 360 | - mde |
| 361 | - mdf |
| 362 | - mek |
| 363 | - mer |
| 364 | - meu |
| 365 | - mfm |
| 366 | - mfn |
| 367 | - mfo |
| 368 | - mfv |
| 369 | - mgg |
| 370 | - mgi |
| 371 | - mhk |
| 372 | - mhr |
| 373 | - mi |
| 374 | - mig |
| 375 | - miu |
| 376 | - mk |
| 377 | - mkf |
| 378 | - mki |
| 379 | - ml |
| 380 | - mlq |
| 381 | - mn |
| 382 | - mne |
| 383 | - mni |
| 384 | - mqy |
| 385 | - mr |
| 386 | - mrj |
| 387 | - mrr |
| 388 | - mrt |
| 389 | - ms |
| 390 | - mse |
| 391 | - msh |
| 392 | - msw |
| 393 | - mt |
| 394 | - mtr |
| 395 | - mtu |
| 396 | - mtx |
| 397 | - mua |
| 398 | - mug |
| 399 | - mui |
| 400 | - mve |
| 401 | - mvy |
| 402 | - mxs |
| 403 | - mxu |
| 404 | - mxy |
| 405 | - my |
| 406 | - myv |
| 407 | - mzl |
| 408 | - nal |
| 409 | - nan |
| 410 | - nap |
| 411 | - nb |
| 412 | - nbh |
| 413 | - ncf |
| 414 | - nco |
| 415 | - ncx |
| 416 | - ndi |
| 417 | - ng |
| 418 | - ngi |
| 419 | - nhg |
| 420 | - nhi |
| 421 | - nhn |
| 422 | - nhq |
| 423 | - nja |
| 424 | - nl |
| 425 | - nla |
| 426 | - nlv |
| 427 | - nmg |
| 428 | - nmz |
| 429 | - nn |
| 430 | - nnh |
| 431 | - 'no' |
| 432 | - noe |
| 433 | - npi |
| 434 | - nso |
| 435 | - ny |
| 436 | - nyu |
| 437 | - oc |
| 438 | - odk |
| 439 | - odu |
| 440 | - ogo |
| 441 | - om |
| 442 | - orc |
| 443 | - oru |
| 444 | - ory |
| 445 | - os |
| 446 | - pa |
| 447 | - pbs |
| 448 | - pbt |
| 449 | - pbu |
| 450 | - pcm |
| 451 | - pex |
| 452 | - phl |
| 453 | - phr |
| 454 | - pip |
| 455 | - piy |
| 456 | - pko |
| 457 | - pl |
| 458 | - plk |
| 459 | - plt |
| 460 | - pmq |
| 461 | - pms |
| 462 | - pmy |
| 463 | - pnb |
| 464 | - poc |
| 465 | - poe |
| 466 | - pow |
| 467 | - prq |
| 468 | - ps |
| 469 | - pst |
| 470 | - pt |
| 471 | - pua |
| 472 | - pwn |
| 473 | - qug |
| 474 | - qum |
| 475 | - qup |
| 476 | - qur |
| 477 | - qus |
| 478 | - quv |
| 479 | - qux |
| 480 | - quy |
| 481 | - qva |
| 482 | - qvi |
| 483 | - qvj |
| 484 | - qvl |
| 485 | - qwa |
| 486 | - qws |
| 487 | - qxa |
| 488 | - qxp |
| 489 | - qxt |
| 490 | - qxu |
| 491 | - qxw |
| 492 | - rag |
| 493 | - rm |
| 494 | - ro |
| 495 | - rob |
| 496 | - rof |
| 497 | - roo |
| 498 | - rth |
| 499 | - ru |
| 500 | - rup |
| 501 | - rw |
| 502 | - sa |
| 503 | - sah |
| 504 | - sat |
| 505 | - sau |
| 506 | - say |
| 507 | - sbn |
| 508 | - sc |
| 509 | - scl |
| 510 | - scn |
| 511 | - sd |
| 512 | - sei |
| 513 | - shu |
| 514 | - si |
| 515 | - sip |
| 516 | - siw |
| 517 | - sjr |
| 518 | - sk |
| 519 | - skg |
| 520 | - skr |
| 521 | - sl |
| 522 | - sn |
| 523 | - snc |
| 524 | - snk |
| 525 | - so |
| 526 | - sol |
| 527 | - sps |
| 528 | - sq |
| 529 | - sr |
| 530 | - src |
| 531 | - sro |
| 532 | - ssi |
| 533 | - ste |
| 534 | - sua |
| 535 | - sv |
| 536 | - sva |
| 537 | - sw |
| 538 | - szy |
| 539 | - ta |
| 540 | - tan |
| 541 | - tar |
| 542 | - tay |
| 543 | - tbf |
| 544 | - tcf |
| 545 | - tcy |
| 546 | - tdn |
| 547 | - tdx |
| 548 | - te |
| 549 | - tg |
| 550 | - tgc |
| 551 | - th |
| 552 | - the |
| 553 | - thq |
| 554 | - thr |
| 555 | - thv |
| 556 | - ti |
| 557 | - tig |
| 558 | - tio |
| 559 | - tk |
| 560 | - tkg |
| 561 | - tkt |
| 562 | - tli |
| 563 | - tlp |
| 564 | - tn |
| 565 | - tok |
| 566 | - tpl |
| 567 | - tpz |
| 568 | - tqp |
| 569 | - tr |
| 570 | - trp |
| 571 | - trq |
| 572 | - trv |
| 573 | - trw |
| 574 | - tt |
| 575 | - ttj |
| 576 | - ttr |
| 577 | - ttu |
| 578 | - tui |
| 579 | - tul |
| 580 | - tuq |
| 581 | - tuv |
| 582 | - tuy |
| 583 | - tvo |
| 584 | - tvu |
| 585 | - tw |
| 586 | - twu |
| 587 | - txs |
| 588 | - txy |
| 589 | - udl |
| 590 | - ug |
| 591 | - uk |
| 592 | - uki |
| 593 | - umb |
| 594 | - ur |
| 595 | - ush |
| 596 | - uz |
| 597 | - uzn |
| 598 | - vai |
| 599 | - var |
| 600 | - ver |
| 601 | - vi |
| 602 | - vmc |
| 603 | - vmj |
| 604 | - vmm |
| 605 | - vmp |
| 606 | - vmz |
| 607 | - vot |
| 608 | - vro |
| 609 | - wbl |
| 610 | - wci |
| 611 | - weo |
| 612 | - wes |
| 613 | - wja |
| 614 | - wji |
| 615 | - wo |
| 616 | - wof |
| 617 | - xh |
| 618 | - xhe |
| 619 | - xka |
| 620 | - xmf |
| 621 | - xmv |
| 622 | - xmw |
| 623 | - xpe |
| 624 | - xti |
| 625 | - xtu |
| 626 | - yaq |
| 627 | - yav |
| 628 | - yay |
| 629 | - ydd |
| 630 | - ydg |
| 631 | - yer |
| 632 | - 'yes' |
| 633 | - yi |
| 634 | - yo |
| 635 | - yue |
| 636 | - zga |
| 637 | - zgh |
| 638 | - zh |
| 639 | - zoc |
| 640 | - zoh |
| 641 | - zor |
| 642 | - zpv |
| 643 | - zpy |
| 644 | - ztg |
| 645 | - ztn |
| 646 | - ztp |
| 647 | - zts |
| 648 | - ztu |
| 649 | - zu |
| 650 | - zza |
| 651 | license: apache-2.0 |
| 652 | pipeline_tag: text-to-speech |
| 653 | tags: |
| 654 | - zero-shot |
| 655 | - multilingual |
| 656 | - voice-cloning |
| 657 | - voice-design |
| 658 | library_name: omnivoice |
| 659 | --- |
| 660 | |
| 661 | # OmniVoice 🌍 |
| 662 | |
| 663 | <p align="center"> |
| 664 | <img width="200" height="200" alt="OmniVoice" src="https://zhu-han.github.io/omnivoice/pics/omnivoice.jpg" /> |
| 665 | </p> |
| 666 | |
| 667 | <p align="center"> |
| 668 | <a href="https://huggingface.co/k2-fsa/OmniVoice"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-FFD21E" alt="Hugging Face Model"></a> |
| 669 | |
| 670 | <a href="https://huggingface.co/spaces/k2-fsa/OmniVoice"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Space-blue" alt="Hugging Face Space"></a> |
| 671 | |
| 672 | <a href="https://huggingface.co/papers/2604.00688"><img src="https://img.shields.io/badge/arXiv-Paper-B31B1B.svg"></a> |
| 673 | |
| 674 | <a href="https://github.com/k2-fsa/OmniVoice"><img src="https://img.shields.io/badge/GitHub-Code-181717?logo=GitHub" alt="GitHub Code"></a> |
| 675 | |
| 676 | <a href="https://zhu-han.github.io/omnivoice"><img src="https://img.shields.io/badge/GitHub.io-Demo_Page-blue?logo=GitHub&style=flat-square"></a> |
| 677 | |
| 678 | <a href="https://colab.research.google.com/github/k2-fsa/OmniVoice/blob/master/docs/OmniVoice.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> |
| 679 | </p> |
| 680 | |
| 681 | |
| 682 | OmniVoice is a massively multilingual zero-shot text-to-speech (TTS) model supporting over 600 languages. Built on a novel diffusion language model-style architecture, it delivers high-quality speech with superior inference speed, supporting voice cloning and voice design. |
| 683 | |
| 684 | - **Paper:** [OmniVoice: Towards Omnilingual Zero-Shot Text-to-Speech with Diffusion Language Models](https://huggingface.co/papers/2604.00688) |
| 685 | - **Repository:** [GitHub](https://github.com/k2-fsa/OmniVoice) |
| 686 | - **Demo:** [Hugging Face Space](https://huggingface.co/spaces/k2-fsa/OmniVoice) |
| 687 | - **Colab:** [Google Colab Notebook](https://colab.research.google.com/github/k2-fsa/OmniVoice/blob/master/docs/OmniVoice.ipynb) |
| 688 | |
| 689 | ## Key Features |
| 690 | |
| 691 | - **600+ Languages Supported**: The broadest language coverage among zero-shot TTS models. |
| 692 | - **Voice Cloning**: State-of-the-art voice cloning quality from a short reference audio. |
| 693 | - **Voice Design**: Control voices via assigned speaker attributes (gender, age, pitch, dialect/accent, whisper, etc.). |
| 694 | - **Fine-grained Control**: Non-verbal symbols (e.g., `[laughter]`) and pronunciation correction via pinyin or phonemes. |
| 695 | - **Fast Inference**: RTF as low as 0.025 (40x faster than real-time). |
| 696 | - **Diffusion Language Model-style Architecture**: A clean, streamlined, and scalable design that delivers both quality and speed. |
| 697 | |
| 698 | ## Usage |
| 699 | |
| 700 | To get started, install the `omnivoice` library: |
| 701 | |
| 702 | > We recommend using a fresh virtual environment (e.g., `conda`, `venv`, etc.) to avoid conflicts. |
| 703 | |
| 704 | **Step 1**: Install PyTorch |
| 705 | |
| 706 | <details> |
| 707 | <summary>NVIDIA GPU</summary> |
| 708 | |
| 709 | ```bash |
| 710 | # Install pytorch with your CUDA version, e.g. |
| 711 | pip install torch==2.8.0+cu128 torchaudio==2.8.0+cu128 --extra-index-url https://download.pytorch.org/whl/cu128 |
| 712 | ``` |
| 713 | > See [PyTorch official site](https://pytorch.org/get-started/locally/) for other versions installation. |
| 714 | |
| 715 | </details> |
| 716 | |
| 717 | <details> |
| 718 | <summary>Apple Silicon</summary> |
| 719 | |
| 720 | ```bash |
| 721 | pip install torch==2.8.0 torchaudio==2.8.0 |
| 722 | ``` |
| 723 | |
| 724 | </details> |
| 725 | |
| 726 | **Step 2**: Install OmniVoice |
| 727 | |
| 728 | ```bash |
| 729 | pip install omnivoice |
| 730 | ``` |
| 731 | |
| 732 | ### Python API |
| 733 | |
| 734 | You can use OmniVoice for zero-shot voice cloning as follows: |
| 735 | |
| 736 | ```python |
| 737 | from omnivoice import OmniVoice |
| 738 | import soundfile as sf |
| 739 | import torch |
| 740 | |
| 741 | # Load the model |
| 742 | model = OmniVoice.from_pretrained( |
| 743 | "k2-fsa/OmniVoice", |
| 744 | device_map="cuda:0", |
| 745 | dtype=torch.float16 |
| 746 | ) |
| 747 | |
| 748 | # Generate audio |
| 749 | audio = model.generate( |
| 750 | text="Hello, this is a test of zero-shot voice cloning.", |
| 751 | ref_audio="ref.wav", |
| 752 | ref_text="Transcription of the reference audio.", |
| 753 | ) # audio is a list of `np.ndarray` with shape (T,) at 24 kHz. |
| 754 | |
| 755 | sf.write("out.wav", audio[0], 24000) |
| 756 | ``` |
| 757 | |
| 758 | For more generation modes (e.g., voice design), functions (e.g., non-verbal symbols, pronunciation correction) and comprehensive usage instructions, see our [GitHub Repository](https://github.com/k2-fsa/OmniVoice). |
| 759 | |
| 760 | |
| 761 | ## Discussion & Communication |
| 762 | |
| 763 | You can directly discuss on [GitHub Issues](https://github.com/k2-fsa/OmniVoice/issues). |
| 764 | |
| 765 | You can also scan the QR code to join our wechat group or follow our wechat official account. |
| 766 | |
| 767 | | Wechat Group | Wechat Official Account | |
| 768 | | ------------ | ----------------------- | |
| 769 | | | | |
| 770 | |
| 771 | ## Citation |
| 772 | |
| 773 | ```bibtex |
| 774 | @article{zhu2026omnivoice, |
| 775 | title={OmniVoice: Towards Omnilingual Zero-Shot Text-to-Speech with Diffusion Language Models}, |
| 776 | author={Zhu, Han and Ye, Lingxuan and Kang, Wei and Yao, Zengwei and Guo, Liyong and Kuang, Fangjun and Han, Zhifeng and Zhuang, Weiji and Lin, Long and Povey, Daniel}, |
| 777 | journal={arXiv preprint arXiv:2604.00688}, |
| 778 | year={2026} |
| 779 | } |
| 780 | ``` |
| 781 | |
| 782 | |
| 783 | ## Disclaimer |
| 784 | |
| 785 | Users are strictly prohibited from using this model for unauthorized voice cloning, voice impersonation, fraud, scams, or any other illegal or unethical activities. All users shall ensure full compliance with applicable local laws, regulations, and ethical standards. The developers assume no liability for any misuse of this model and advocate for responsible AI development and use, encouraging the community to uphold safety and ethical principles in AI research and applications. |