{"id":170,"slug":"mlfoundations--mint-1t-pdf-cc-2024-18","name":"MINT-1T-PDF-CC-2024-18","author":"mlfoundations","description":"\n  🍃 MINT-1T:Scaling Open-Source Multimodal Data by 10x: A Multimodal Dataset with One Trillion Tokens\n\n\n🍃 MINT-1T is an open-source Multimodal INTerleaved dataset with 1 trillion text tokens and 3.4 billion images, a 10x scale-up from existing open-source datasets. Additionally, we include previously untapped sources such as PDFs and ArXiv papers. 🍃 MINT-1T is designed to facilitate research in multimodal pretraining. 🍃 MINT-1T is created by a team from the University of Washington in… See the full description on the dataset page: https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2024-18.","tags":"[\"Task_categories:image-To-Text\",\"Task_categories:text-Generation\",\"Language:en\",\"Size_categories:100B<n<1T\",\"Multimodal\"]","license":null,"framework":null,"parameters":null,"downloads":165744,"likes":30,"verified":1,"created_at":"2026-04-20 14:59:19","updated_at":"2026-05-02 00:07:02","source_url":"https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2024-18","source_platform":"huggingface","hf_repo_id":"mlfoundations/MINT-1T-PDF-CC-2024-18","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":2,"risk_level":null,"risk_score":null,"versions":[{"id":169,"model_id":170,"version":"v1.0.0","manifest_hash":"3d939d1b2d54ce76fdcc9ce74e61f8db587085b65ef99ca2b2c7b50102342cbc","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/mlfoundations--mint-1t-pdf-cc-2024-18/v1.0.0.json","created_at":"2026-04-20 14:59:19"}],"files":[],"signatures":[{"id":531,"version_id":169,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"92e8bf9019a8f5757d1c8fb6dc716bf9cd602f17464aefafcccf561a33e72df7","attestation_type":"registry","signed_at":"2026-04-20 14:59:19"},{"id":688,"version_id":169,"signer_did":"did:web:quantamrkt.com:chain:authority","algorithm":"ML-DSA-87","signature_hex":"d016696e6da0324f1676148e84b696f947289a14c222cc1bb6379552b7ad1beae8cd04411ba18f4c149766ccb357f12f4d7f3a9a4102fb1803ebf0cfdee33d8b663ae6bf7b93d772e52913d2c2d3333266ff427d73f4206aa20ea67500ee93f3a121d2d79c904852aee5eaaae824c6be635f9b75c38f89e133e7b89d9436942dbf2247bcb95bcf618e9169f22b574a348dd705784d693c18e053802b7a5422df52851b0c4c465fda28ab413b4ccea5a9930282b689f0b3cb29dbe6ff5b94dbe4036d115593f1649e893ad87c89f6fac6e5cce07cfbf0361b4354d470b595702ac783844c70b551048af765ef57a22d4281ffb6ea19db7ccaf46e07b1b3926f13219d9050ca92b5ced75be981671873109272f1e9311f8f33d327a0b4ca58933d7dbf0de3d2d2e40a1b78d79faee1fab32a6add10e6ee6d0cda122909572effa3b4426017a66bc7874c9e931a4a0fd9711f91cb24222a6f6a9bd5caea232503610e9ba45cb0b4f869584fdde63e5577b03bbca3f9382a729916e455ef09aa292ccff7e396d780af65ec6d5713f3e2abd6616e6d95683da0c706d6b9e00d886ba83bedaec8098791924d439d50aa539681e5513405bfd60757b4241ff9e00bcb2ec00b92ad0c11ad9fe64d5f4ab9b6a044063bcfd1c26561af15019feadfffb2875a948640bb71f717111880be11af246dcd59548e52294dd0309c67fc13a73fbd2ec50ede8fd0306eb6119a2c40105b2bb4bb837c1f18a28bf172af67d3901bd823a90d1c084a76d8414930cffa8f3e7388c580e056999717fa77c96f47d62ea335929754329ef0f1b5933c7eacb660871c7e1b52820fd6d30fec0783a44175f009a460fb0fe3f26eb3b8f82de5d9128c3423a76e80a34a7ad52fdbc316d2bb5e27d4fe1c603c748b1ee0a94ba07077e90e0ee991fa7e6e8c227a2e162ca9788484dc9e3701542a5ff77d85eed0eed7496b75e015ba6bab5a017e7f12d418a89a2270d7e90b3843e8fc9c8558b20b5008104e8cb5ba125379cbe3e6497c027b706daff6f330e0a8f1dc1d7071b6bf881c511d5ff63fb981db2b54e8c0bb63c8bb85b321825b8cb9405dd3c9aa736f0f2e3a382053e57514890a3c9296ac86ff05b7586ddc6d06815178c0234ab12dfb11180c545e3674c01d64135582b4e99bc8db249168ae07f89226f1e0525b7756c02a5333904623cc72489eeb23dc29183b8932fde982bece9f54a31399d392ec5d1c5e41f5f1bf2617d79d59a2e6c4118ad3c33867105f3af086cc5444ecdd4d4f57017def57547745ec2bda5970f9868a9617b9aa8fc0e65a2844c665fef435fd3910087548fbf2bd8df9a53e4b6374ba9a219008cb3c0bcad26525461b0f4228222828ebb8b09863a9897096a4543a9ae80d3bdf5b6fdf2c08106e21c5be5156a09de0ebe41d439050566a4d22300a00047b062cbef4934558eef063b3816b0c55f4247ceee1e9084b5ccd16f39a0b1a956185dee046542679bbc4658680c013e5927d3a2654d6865b2e52dcaab7fdc48ab88fce5b0c0b2ab6b9c7eaace2d0c85af991f06c582b0274671db2e213dca2084f51b4d07efe0bcab9b35eaf47169ff10adc1dc972814d80ea243b9b9d9cd52aa943ab1eaab40459ec2c1ea73ab7511494de2dada68b0a04176ef673616b82d8962aebc35f7d283950ae154be76742994a95e08e74a2b8d5bf2d97fa1b8c2d6cf690d37382646329be01da53fa9cbfe9f87f77e92c9de9ab4d0f4d1d39027e86b1142a4f994ce39cf58c65648e95ce8f15fa634016bfed579233cea3f1d4cb4af9686cd4aaa74f1497bd9bd829076047240e4bc845ae5737afc4c42f691f637cf383c9ab74b86528dba506759e331a843a1c19809ff16437d365a458b1b1edddc39f59ca4f1eb93c8eb7e680b3a633f0c3836bd5a7b3ba55bbefd743b6abc73c463b7ec6db56b563e66265ec233efa819aa04275c80161dfb0d959d427b53b0fb72952fa61011514c6233048f4c27c8698f72eb1b39849ef529819ea22cd26fa871fc7b391e6fc9bcba697737f3cc078df9cec90660c07e72c2007faeecf16145229094c83c49ba35cc77f34ec05f830014285a7b2d597a2601c8015c04515adcaeb3e64232efca364e8d903506790a4c8bb1631eab3b8a400957bd170137922c502952e5e500a3948f6ac961e9295876637da6c6f9d6330b37dd317f59bcfa1ab50a18e8cd638c8c83c1dbe19469088ff78dfa3733be8f89dc16bdb6f32a7c2cca6d6a33ae6b760077bfbf4b47489892b4d381c8c073be7f392eaf2ad3ad82f3994b089e9254148c0413456a4da2a20de7120399b50b2acc2586d011d6dee997ec3566573c200caac8ebb6b96a4ddd7d8666de665008dd852fe16cf5fed62be4d5650dfa56dfad1e5b4d61bb02b135c59b1e20fa329e12b8afd493c7aec48d936e95ac42dce821e2b88cfda0ea11eb2ff61001eb26e006e29f18259a20f76d99b29e42588fda715ba1834f6f17bea8da72e11cfa408e142b8e09c4de9ad1808d2db4edb5e31efb6311e9622966692bbfef148e995fe654148c024e77c8606976ec057cf0a35d8613f2f45bfe2ffbef86d9c011f486105f22b0656fb5627b49d870630a6d3355696a907e68a866bba71b92b3712d606d6c729b4dfa0d60d7f2ddf5c8fc2f8356374cb38c619a7f411536aa5d68cef266d9c00bc797574400e25217ed5682186ebac70a83877d1ccf9415e56e72a682f5a6911176911030a782a673c9dc7de0ec8c99e417564017de6d753e7afefd8115a54a79eb78338e5b3caeeaa69f68f77b30a93c87e9fcfb69a57db418915512f658612595864e2ff7c464355a4fac9ca3189264f88e290b400b1d0c3c3ce30d1300340431da9b941467e51d7d024b149f83826947e89ad769db3da0e2d49ddbb3b529b8283cc72ff950fd95ff986b340f92ff48d220fe38ed420eb14edebb5b8a12f82e8ca8fc04dde31e4eae4b2432f77a8b43c9c5736f5c5e41f0a6d121c5b4a68e8a9f869552ab9bd3870747fef6719866afa7758c2b5779e555a5ec1ece14e35e7699dbeeb04f4d444156d7109f7f9990e703c3178dd66a2dccbf9f7807711d50a6516ef22d9c1ceac0878b45a8917856f2069706f8760dea55d194b9091dc2edbcca9f5c0725be83147fbd9f362cef02fc58855947415fb2a82e048011d8f91d2ac8ea0db01a6eea34401cc5e1ad46aee1fae1d67406ba78a948b10211fb36510f29f4e83ac7fabe3a4f33db28302d73ddc6cee7c8978d16a2f0d3690d94b762b0d26add6e75975b50338450f67f1f40f7ca38517c7dcc33e836d2e913ce40f6d07a8ac2c1ea526f124720ef2626add960b6ac03c212794e4c2006679d5778a5371498f95d56fe455276405199c3a7e5bb14a90665e944e44b63e1a44aab15250b7a9677a2a18ad5f34eca5c44bf59f57635a270e6290c3220d8a7f07db4572ab497215b55fa91d9939d0a57c7d65d33102878fe4713c9bb0e0ee0c90e5dcafeca2d355ed4bb50f54246ac7f662d2b87466f50275797a3423f839c7d860d85196acf93ddde090d0c7e421051636d2a571ebb646c5a532b4e5e593ebcc23b4e72b55a968be58002c89ecd3117f179f8dc473caa89bcb8eb3946f433ea117b1aa94a7bc59e70d47748d1ff1f130445ea70c29d3d287202f32cc751ab350bfdd4395cd8106a05c2295e34256f693ea5636c64020bb4eaac52275c63035a6a761de3f19ebeca79dbe655781c4f274ff688774890ff9daf2214746d03464e2554df883cfe095e69bc5ce76c1696a300176e744026004ae7eb3dbe01ef6abcee19e7da6aa3350fdfbdfb469977856875233c717ce7273fd85d848e6206971af3c8afdbb758c4645da4943e8a7537b91d0248086c966f79ca94c88a848f8174323cd50507134bd4feaab775ba2d8b82c81e941aadaa9fc2dad25bbe3bed7f7661ef1c7b259cc741ae255b1bedd180b42b51f0c599b06e1670e7c68aac30e723740b8a2a9a9f22db045777fbacdec3c4566e93521b549d110ef9ee260900f190fdcd4a57d6a36fa3244d6998e27fc1e8f011ef392ae9e0c481955576c2dd17ba329b05ee5a45b199f11fbca08c8432b4e330fbf9a2020184ee51233f102ed543bc9cf54496acc549d3c6e4d404dbb19376b77d1402c4b1a8bc6f6c28c1b3d09c3bddf02aeea8611068110fb529a6d604c6a6b9515fd0cd407f1f0c73cd095c2b8999f0018a5de043e224b6279c11f9839887c7f47e2498d663d1a82df18ddb9db4c422434f79007acadeac14c19385919127839838c932bbfdc45adaa3e26742d0e2c8b58dabd1e1fb8db5a0c02563fa6d0667f320bac0408c5d0727421b2ebd8b8e0fd32abd1d45753d03b961a9556495e698e46649689cf650297d64a8689fdb9195548da03be6b993b3ef2a016d2b1b39e693441d273dde95020f93cc79bbea08a45de2ff7616c66b77282cff03e56b94aebb6984c8285da87aeea8bafd669d13926450d3790a3df71f8743c6911c84dcd0cbd05caeca9a7a2b653749994f1aa16d85d98bde21a9c81245c47d289b729407a491315dc6db494d30ca210f607135697d082100b6014f2aad7e7588dfe86a2ac4069ca9e28647b2cb64f68ef114de0b4635c3d07f6405f1e36a3856b3fa996eaee88c2c2f703c58d16d78d8110d21e00a74e789b6feabdd3ab68208cc9d0e572c7edd0fcdd01c8c0f18248142f51c07826f316ef1e72fd0460dfd7ce7c9263cb4df2ceac88e783636cab77fefb321fe0c5d81ffc26d574fc14b662acd638ebfca768fe3dd9399cbaf40d3d072cfb167edd7a5517b134f7cd65ca2fe9b36018644c927293bc30f459102d4f8f7fafd8b9da309e6ba0c97288592a749281152fa5fb890dffdf9c4cd904a0291d02cebf5f34f2aa0f5a840621f4b94f961d99025fe046357e3f16a92e8e3f0aa62e22c5e8a498298aeb139913524fad1d16bb7fe380f26272b5e20d9679c6779481b62ed5af308ca5aa0f89ba21dd2bec1f2d8d6b87778c779c955612963e8e46889c3d449722ee20a9916c1cfda9b43259f73b42b48f6956eda597ed5cc6a3b22d250718db3e999148d59719e2d2a0fb748121894ec1a68fe01459a35abab7d8d467a595d07b66fc9164cf3f4a1492753256bd6868790433779bef6ed6ffd1399b198c050d0b0f4342b7b174cfd470e0cad66b5a2843785373a67e07d7b7140df0cb7197bc2b8adae87fe0f5de8e1e71636e0e292352b6cda21553c2272143cdbd7b0ae703aac30ceeaf5e9a918b688ba95ead71cb9a00d446810a434194f6b1644b54b7ad30347ef6acda82a9c9d7b8f57147232611b1b949dac712482ce276f00ce8e3fbcf67aad526bf72f6eced4b1b728206fd86b81a0402966c046a8b4c3b7c136efbd46e860cafbb352251c7d29f85f94d225287b8d42f5cd3801cc11a06f232c167c7485aa07bc6bf9c296e5a22274387da367716bd1127c53d3cf23629d0ecc36f096457ed11754cabefcb24c3c41ad80cb7e928df9858f4a574685b00d385b22ac005195611df256909a5c23e033285ea18cd130595dfc9ee2e2e48b2940ec906cee7545a0ecf48eac3c571236aded5bbf475a4c7ccd8e0c4eb7dae84eefd1b314458770206c921e80406cce37c3fa39e22e52355f013afb15ed921bc9080bdfb341c92ab21a5bb36d931560acdfd935bd0d2a8389758fb359052d6920869df7d233cc802ffd7ff4544b49658d871f02499f13812d3ff3569d5f21f27c7eb202095225f86c1130994d3a5d2a588a8a74408a5ff147a1b455d62f96070ba006ca112fdd271ef116c4329199b60d54116b15f2fa44c0f61366a0d9371139e774fd9aef17e4d367b8dfd8dd3f3ab49d589e0123a9bbdd2c0f1f5a5eadf2c9236780eb9248558fa8da3737f600a355b3a6d9c2482f619a7786aee50cd1185c4d1433bcc8dd62c4cddc538b0fd4294221fd28a471334f15d9f636fc52501c38a06d4d2b8054049a0125e8c43eddfcc3874dd239a956dc65cdfcc0b5aa5ba6b4fef381130c4413445f85585e2c88d84b9dd3c4a299da0c29bf4fb76cb6492cf243c373e458d85ef405a4c3d5024295a3d5148c0c06f8565f6327e42e59ea9096ef92e2277d6ec4e52da04bab28585eccd0855db07734432b9be60d73636704580a9ea6cb4bbe40fed5b829e809036d38dc25350494fda785a722cb0a3416e3a71dd5336173434c579f4fa9ddaa3962cf37d4f5ce94ca5bb6082d709c9b00778d2b021cad5e762d65e2ed217fb6310801e9336e2d1640ebd2de44c3972507f35f2fce087d1e2df555cb692d36dfad53cfd4888a854b418cdcd5a82f248e7ead3eb17a9153828b974ee9fb0b17f712989f6eece0af901abb7f972574b2297cd258335bc8ba31558cbd0f216586175a0c6233781d6ec2e3483858ff85a6699a4bde8061219324a6f7e83b6db152c4957ce01206b88a8afc1d40e1e2c517f9ba5b4f90000000000000000000000000000000000000000060b111721262e37","attestation_type":"pqc_registry","signed_at":"2026-04-20 19:44:29"}],"hndl":null}