{"id":229,"slug":"mlfoundations--mint-1t-pdf-cc-2023-06","name":"MINT-1T-PDF-CC-2023-06","author":"mlfoundations","description":"\n  🍃 MINT-1T:Scaling Open-Source Multimodal Data by 10x: A Multimodal Dataset with One Trillion Tokens\n\n\n🍃 MINT-1T is an open-source Multimodal INTerleaved dataset with 1 trillion text tokens and 3.4 billion images, a 10x scale-up from existing open-source datasets. Additionally, we include previously untapped sources such as PDFs and ArXiv papers. 🍃 MINT-1T is designed to facilitate research in multimodal pretraining. 🍃 MINT-1T is created by a team from the University of Washington in… See the full description on the dataset page: https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-06.","tags":"[\"Task_categories:image-To-Text\",\"Task_categories:text-Generation\",\"Language:en\",\"Size_categories:100B<n<1T\",\"Multimodal\"]","license":null,"framework":null,"parameters":null,"downloads":245208,"likes":10,"verified":1,"created_at":"2026-04-20 18:22:10","updated_at":"2026-04-30 04:05:02","source_url":"https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-06","source_platform":"huggingface","hf_repo_id":"mlfoundations/MINT-1T-PDF-CC-2023-06","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":2,"risk_level":null,"risk_score":null,"versions":[{"id":228,"model_id":229,"version":"v1.0.0","manifest_hash":"604c2bfc799f21f0f0975819cae3d0f8712eee420c44833ab71c63f1bce9ae68","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/mlfoundations--mint-1t-pdf-cc-2023-06/v1.0.0.json","created_at":"2026-04-20 18:22:10"}],"files":[],"signatures":[{"id":590,"version_id":228,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"0666b87f6c6f27623a8ff702fef6bf38922790884e3407933aa780107372656e","attestation_type":"registry","signed_at":"2026-04-20 18:22:10"},{"id":707,"version_id":228,"signer_did":"did:web:quantamrkt.com:chain:authority","algorithm":"ML-DSA-87","signature_hex":"789ebf3e14cbd5c500fcdae5da8139ecf6d259923e10befc42936ed9d12e63d55af970e33b99a78f1a1d9526ab861fe94e5068744dbead726829546499d490251157ce74726a3df7d6b3729487ecc44cbf2bb83c8101dec9a31aee3208ac4ef5788fb895e6838b0592e4b9190a52591615b87862d7dd48237fd341ec856bc6cd3ab6eb60d7c986ffeaebcf9eff2bcee0de0a0ef33562ec13edbe3c93b7660da14010e1c8a99e5c9d332bb65fecc396bbe59fd1d8dd37557755df7294f4484d9334bf9665ca080b43bc661199dc67a81255c65268cf869e2e93e03c58984453e882324d1979979a3e7229a079d99c2c24121431d9011a6df325036d8b274981192dcb4d4d1725459cba8bf1b08cab2ee206024a9e35f1f73057ed2d3f8798daff186d379ffb7b37ed9ceab9c27e9fda26bb93687e0b9b5457dc767bd0f164a9d06b9d90103ab7dc916c73205468012cdd233bc7d66f2f2c98383ad44002b42ef6f11913314daa9f825b571a534b980d8acb55dadc72c68073f558250beafd1ed8283c01e59615fdee6a5ed05c6a5240c5e2f140f8408ec2af2a5f3b047fa115a02d8fde8f6bee5339742aca74f5e4c6effea66e9664f3005463525af6e21307f7a777b62875ecd1508ab02df524d1c650e13c727803868ec5112c1d791691b85ec1935d05d4c988df677813806f5ac4700dc38de5a255a0d049355d439c551885fc79901c824e6da76d97645dd05295a49f7eef3095601b455f532c444d5d6df90e16c040820c0e0fc961ae5ca94bfbaa4041b745844cd77fd8743b157b6bdd0a570bbdcbcca592d73dd367ec6e6352db2c442992687026b7ff304fbe0c75143aab1c9c2e8aae0a0c3b31a43784b3f5360e7fe3e64e5371f701e2133087290d3481bfb15a163ed2c051a87caa4fe84dc32f00bb17d7c4069efa6ecc813d0c00d6722aeefc5f4cfc4c12da1d3a9a0756197f96d8be337430667b91dcb79bde08c18a409dcd9338a61439afa34498107cc3140fcb59f157a3bb49b6ce19324295f296028eb2db2b21b5b9c254776aaa69390c50d19b84fcf535eda475288f8f47bea243c714a061bf49d9e1586db10b2ff9a7aa0f94c166c86e496ceb513a465b59303967e8d6c5f104b6be8358c4b7580f133eec909f6499e2905ed3093590443faf4bfecea02ad66897929b8d189fb14a5c4acf19bd91325670e9fd8b641056c546c831b05d8a91670615be69756c9c18a331ad6a3e23625ca8604a0bea7d59fcb3a839455164ecf4aff0d2f0c0c08f298f9773aa64a085f3f6042eeb1626c8bbf89fc11de1689c01fc9351776a5576133f79df8932dd20c19e7584cd5c4e371d18d775322d189f776baee62031a5c632455d30dde0f117724ad1ab9934e673ca185e5a4d673c3e2c7fefcbae892b5702a2a1f20156583628a4b5b627d89c4d208d1f42588c31bd0bc4c02c4a2eaf483135c2ecd162cfe59153083716cb6fdbf56cb9263f54b98c8c3c257f7a9565734df9243452a1fd41561b3e7beb44c6444302fa3463c7e2e1ab140959971e67e9b15539668cb7f499d509a6a4a210807c1eeffa1abdb1532b8c62a9048224fed004c4e4a4e39214164f5af36536a6b6c2e93f7176dd148957abbc1f43389a6e43e10450dc09cfba478fec96b5fdb2def2716edb7225a622452b299662c08ae9f3e0ad516256653d9cf6077f8757bde242523d74ebe87e5e7849bebeb61e9b8383b5bf33945928e85fdb1eda36f2ba9199e835db0ce580264eaf19f4bb7d67f62db2790fabc650c4efd3fb86973550777a46801b697001de401d0d34be408b3acf180c4221f172dc9c13713df86935446c690dc50d6c21fbb3ab9c4836066801b2fccbaead9472d4e6ab397bd2666a92fd78e68c7c25de352c9d152b4bed673f5bf77e7aaee9c92ce4e00ba41dde15157f1e29c5e192b7d82e540c6a7fa7dab51fd59b310c6fd6afc46d90ee78c9ca6ba63509879160067318090e1029e0b7dc470e67fd5ce9e0802594a2478d4672ac0da86509faaf46c3929e17272898e4a4025b9fda7cb9d391292149430d5a558fa0ead76ebde8f44570788251cfc778f0c1eb8950de622edc731b02399410f96ea1b7f8615ded1c88b8a677eaf334bfdddc05a5ed14564c1daba94bf07b2ff2554ff983254f0c45a44cc8a8d9c474593cd8ee5d72a0a6799154fb376abd902d41387d6bcccc4b8482e770b6b40892861965e12d8bb18cad68ccc9acd2a864b09bc67a8d2a308eff7090bfca6b84ace2d6743f2beba8045c43395f22ffb8cec57f2915f3be030cd10ee56daa078ab2ee6f5c83ec6daf8e8cea7ab1984552aa977c6305d1eefc8ca4e4a0345865c82b311c9ec573e4dae7014ba2c523515b47ccea230c57717469945c605384de37e00f51e5327a5e47ea352a6ac973ba93741e0f3bc0c9bf9039d0d5597618a0a08c0fc4d435f72fbd5e6b34a417f0dfa16157e0a507f78615d33e0d4460668e78a3fc58789ae89f9d777b163f21a42025283349360e88cda542bc4adc59f60723bb3d64d07316845dcca6ba4712bc16c6d69e4110434faea5cdfee0db8bbf45289838c4423e6d347de261bc871e492a74c5c5e3f79e520eb42e6a12047f05bd6568ad16c41169ac071958c322100905ac1feaf94bb0e46d509d7e8f13231ff8e8db68e0233ce2ae6598a1ff6c5b50ff2ebf51a8af1bbae8afe1e0c399f39eeb94b0487619de9877b503d3ceb625d5f9ebbfd939ae195f599fd5997c2ab57a165eb75f6703f3c392d9c7abe075af33ea31a3689a88313e02291709e77a2798b9a54916cf394f58e5418668e50cd69a378da2abfa2190a3ccdcd6ce2b65973dad530f1e19ef6717436c778bb3571b5ea4744f86d323943512d2c7a16442ba99877fef73f3ab5d6dd77b1fab3f22dbc3bb1a5bf8c1d54f794c840b7be7026fb392e88b3efb34562ba61d44c7ec3ab386d38cf3788ff3e10c885675c8649e04b2d823e7ef848d022402d993feef0e8de0dab8d9fcf9f4207d274838e0bc92487d0fa389b0181dd90a78752dcf7967f87ab0bc003716dca8d4f970ca8baeb05e621f14630e1424a52d413410007cbb1480eebac44f22bf972fe77ac7676b2758bb219e866177889443be01536b1bf39d957f20e33ef5007c17412d755dd045c3c617b6cdadb1aab3f55cb6a6cc97a8a9731b5d9fcdde740cb4bf76bc8a35d9c896a0d1b142a9ed4c028483d227907102755a5cc7e524230a542ba3e7bc823594f035fc3806e07deaeafca15e907bb86bec8f56156b6f7f395f8c7cde3ea5424821eab220c3b013b9e7300b60efc3b6cbecd9861026a3f0ef7d8eac6749207d1ab99d0df41ddc0479848b3d2c02cfce577e5bcbd75f4b63b886fa506b639516a6e658f9212d60ab016c8f8dbdc4f42f9eff61d250c1a13a77f5c5fbf0f13a328d0eb0bbd39da2e2cf832db7ad7a8637bd473e47fd4fe512a397e261fd71f8a8c1b828af96b1da5803a1bc6912dd7a99c3b42c51b7d869412653945dc6f8f03623c4a1623c5e6506448618cb6f4c559180f6a01be4cd1380a404bbcdc462a14f7ed7bfc40a27e87f7bea650cbdea213825d8f941699adc92c70417b83bada7b60470b94db91771c35bcb1e9e5d4cf15e920fdd9322504463451702de026cf60d07d85539c05bb7331401ce2dcdcbaa2e5789c018fb326bb235d726d6d7117c57e96e3f1ee338f0ec93ae832024a59373eff8dd0453bb8b6cdb05d815d101c5bdddb2c35d95fc3834ba0ca9d7bb5d2515a433f8cd1cd74ab13e6bb84b142ba3e5772e56771fcb9cc09c54343bd8a5db85abba7ae08de788f7178a561a872136c71df889e4aacbd6fc616667b29fc258beadf0f94abdfc19a0824b6162c5376574fd2ab0772244b594f2aee912ca7c472c8a25fe0d255bef032ce233e9456b05cd22208d52f5f19eaa1bdd28ecde13f254ee1f70cc16e7b9e8dc0c614ad6af16220f956548862f86835395a79776686d604b00adc8dedbbb15df0cfae532ed20a5986cc705299f2c1c66df3a98487d59702acd64c44678f820cc5c3ffb9a57f5b0136fb53137569ad4fa8fbd01f4c14a31ea484b44ce5ea8c183104b3477cc25afd91f18608443dbb520202bb86a63ca991d121655255ba51dd51f5c9251de15e3d3218bcd9ef223a909aa3c6aebae722d722b1b7675286f9884b921aa4db60e61df9556c2099dd4b80a159033c8a84b34bc3daebf95033d3d53c0109336a3bb7023fb96b0b579246d35254829660394cf66502b0476194a5421e0e85893b06eef5230e866407de462e44429aa8477b6299f98af4ded2d656c9520c9898abd8dcc45f8f382af5c0a8e9dbfbe813159e452428c2dec620ea95032d1ad0c9ee5d6bca28e35544da542ca7a1b856cf834f05b0c51019f19cf13910823fca62ab1b0a8f23bf560dd5c25d19034b0fc94a079f06f699658d777f2360319a55663812506cdc94031d1cd5ab74b3f0323eb96544b5a0e1e8ea98727aa7762c742ecd9ecd8295cd4c6dfb7b86037683b35e05bd332ec69f09e356def0613fad92be32e9ca8e43a4191e01aa5a9b96f250949587d7e5d43e2f426e415b11fe43c0c30b568f09c0615c07aa967f2771ee861cc2a7146b18d34078094e2aaef5717a2c52a364847a79c3359fac65291d469500f38a6e4669fc4882e03b282ecb78c0c75916bb49a0997bf5518096574b3dd1efde24a68d66b65b9a85e58dfdacb50d36c64cecc9568ce1b97c00af3fc6b33f390a431abc64bad430e01b73be4efd15bcebe9ac0a97f1bd27641e21987eeb18f3685e1a8d031a3258736ff9d054354759ef6a70092a76c2c50af15ad782551aaf0b5b3c63fc662a9a7f9fe04196a6339a4ecf2122c7d9f47a31643c1c871848ef56ccc640701bb1e2706eaf69c364a1cd8260f89ec618474e0119aff841eb36e16c509d58c51e060b05f963f772d4d72018cacc06213bd494482eccc6165f18ce83234f43ce259aab502924433c848356b38f032f77113e2c3c6a9bee9bb103dc813de83ab894d594c696b11a2075b4f6443ba2d50a9ff61d5051251d9079188eaa3312f22f484ea18220bcd0a83639aa088e87262d54a8a74366b3b8aa2538ce42e9e6fa36e99146e1ea0155d9076a4aece9a2b2018f37c4f2afd2aed32d66ca239caa9b36ffb789e96172a0d3cd32421350200fecc7aeb13b14b76859495e629e2627128435ccb2f5cb65eef1e61dfab9db13bf4762bbeb22488788792dbef2cd7859b417cbdeb6303b62f059b8d5a37b166c969b8e859770c6f4b310826505f74cb0b8ee82c354dc3123aba43b73b982c98e5593050c94c16605376045ac7ac2cfed72c4ff65f332a98047fac055239e390ad7dcb06017c5953c22e566542d9b1bcbbfc1abe10c86301abb7e1d8a6073d845c45bd6ad5a99c87a1ef7a23f0b13e5b5ceeb7c84172f94371712de60303c8e26c8d984e6986b23b7b303148b6128d62d601d0d421ba442f27d03ccbcc90e5ced6a4bd0f6e7f5ca87fbdbcd8b8d089084b71983760ed4b53fbe4ce17cb6472cd726da7c572322723eee3c644801c35173b8c40404478286f4ae2a4c4c900a59b46915517c0d669426ecaf1a7200e1306cc05ce6c6026225420dafc67b0ca88a2c5e15ee2be5ccb8da14d9440f1328f0eb87f8e83774c9e54dbc78a150c4155afad5c872206fc8510ed9cceac13e2bc336e530975f71c0d4ee115c2ba8bb318676f1ab631270c7e5eee23c79e1624fb4d2adeb2ce77fb9faa50e75d42b738172dd1e0f5981be518404df34ff2e38a1e4f6fbecc0fa9228bea2bccac2a664a8686421e8084358539bdba232ce94729dcad63228fb480aca98390c718c978ae343cbe4f9dc7d4470044d72d2b0bf108466e8387229027bc6a611fe95a458db7c0c1d8b62ddfdf33b69fd836f90b87053afdbbacb873cf348ab3fa65b1ea391ee5646328f0154b0ccc167795657bee4b1f4d70ff126858ccd97bce0d5d13f23f49481d890b791f0fc21466dd6813b7aa249c55b0b97251d5c46654f15c0370ccf3c9700320091e63c2ba5c9e82b29a1d177b14661c890e3933393278c2fd99284be985a23b587b6e6a71c247f813e8c06a0682d3fb3b727b2e666b655cd3aef579033e67b94ffb66c7ce19957dbde34cc24bb346b7d2d8eabb7b569427b9c6630563f69386b667de208a0d81043130766acf715a384a4671d1010fde99bfee9a48627738451104b8880a87e859f8937a2920658b00f9ea01a381aa45b2becf6c9e1a631c41d7eba745449c0b63c2e5e071581df9b5abf47f535b650152e5b23eb8c9f577a2394d2643243db776d62a34809aaa28179fbbef446c392db2992b29f8dced8770d92c5c141a77036e4300e4f8faf5c0e5e28db3c538f5bc26646f8a8c919fa9b90b112e354e6aafeaed091f2f3a70cf090d73a6c6e2e302116ad5e7032e80a7b1626768808183212c4247b7e2000000000000000000000000000000000000000000000912181f24292f35","attestation_type":"pqc_registry","signed_at":"2026-04-20 19:44:45"}],"hndl":null}