{"id":171,"slug":"mlfoundations--mint-1t-pdf-cc-2023-40","name":"MINT-1T-PDF-CC-2023-40","author":"mlfoundations","description":"\n  🍃 MINT-1T:Scaling Open-Source Multimodal Data by 10x: A Multimodal Dataset with One Trillion Tokens\n\n\n🍃 MINT-1T is an open-source Multimodal INTerleaved dataset with 1 trillion text tokens and 3.4 billion images, a 10x scale-up from existing open-source datasets. Additionally, we include previously untapped sources such as PDFs and ArXiv papers. 🍃 MINT-1T is designed to facilitate research in multimodal pretraining. 🍃 MINT-1T is created by a team from the University of Washington in… See the full description on the dataset page: https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-40.","tags":"[\"Task_categories:image-To-Text\",\"Task_categories:text-Generation\",\"Language:en\",\"Size_categories:100B<n<1T\",\"Multimodal\"]","license":null,"framework":null,"parameters":null,"downloads":392846,"likes":9,"verified":1,"created_at":"2026-04-20 14:59:19","updated_at":"2026-05-01 07:37:36","source_url":"https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-40","source_platform":"huggingface","hf_repo_id":"mlfoundations/MINT-1T-PDF-CC-2023-40","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":2,"risk_level":null,"risk_score":null,"versions":[{"id":170,"model_id":171,"version":"v1.0.0","manifest_hash":"71bb51ff98f9b9aa965b0908f164f59c546991ffd310032c7eb99b06539630cc","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/mlfoundations--mint-1t-pdf-cc-2023-40/v1.0.0.json","created_at":"2026-04-20 14:59:19"}],"files":[],"signatures":[{"id":532,"version_id":170,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"159312b002c9383ad21eace342c74ef4364c9e9007fb0e4ed160fc2363f309a3","attestation_type":"registry","signed_at":"2026-04-20 14:59:19"},{"id":690,"version_id":170,"signer_did":"did:web:quantamrkt.com:chain:authority","algorithm":"ML-DSA-87","signature_hex":"77528d44bf0322d3a3638e68b81a24a136d93b666e2b9394b472f202df4c1fd95673ed6bd7b8d41f206988bdb8f9804029bef3a434244658f6ae157a8e6666e788cea77848e473e3aedf40b5b08d0cdd39239671aa6a21c280b395c594d2ec480eb94c33ff363501963e851f28165da8c23e7b0fea21de5fb4ebb711de6c8bdb40e2c148e87691ad9150d71175de3f58da6ab24bb16cc7983f1b1578d9431d7692853c9aaf01126558bb6dba42bb4e5153df343b3d40fd3833d7c0f57e95501962381dcac94b102053b88d7f403b96ae4976f42c36015acb5a1a9ed496d9e30ffbe1380851cfbcdb969e3bec7847791896b255e57bf67153c617865399077da92ae2b9e583cda3c74ad21b55f031c4f00c983de6d1e60f0d5cb4cc95267ba5f32426aef3d3b6722efedea7e3f33ae700aa756e4538f3d8c9925efe54d74616229fd7508aa43cae619430e892d5d4de8cfab12d896aca6905699ea546978304f69e82980981185db8e730ed3e92979090658ab17e302978eef3eade10aa199cbcc69c66eeeb9d7699eb29b3d8477f50cded5171a953d09b94b4acf8877212aee299a9f9a426a2fcf39ca9ccc9e7c38cce35282cfde8ad9502080a6ebe5534014aad0fcc87fc99bb1947263cfab1320acbc2ddc6640d095dec23d85cfa1b8546c29a7c7be6f88d193fd0c39e1e40f529e87529b941f869c9d6c6d0608979ad99de0795e1eb765f5c40b7f7268480ba85f82fdde83ae8d9c0936d738ac9d7eeddc04ebcff2b510bb1993016f6dc101565673640f73d37c56df86ad227af88e90baa7aec33561bc2a28e40034e942258df21557e2881ecee7228784096fc3e4fce084240076f997435717777d9c8e195b2922c8d5eb58580218e7f4257dbb36747a354528bea40ada469803201b057d7d22c5d8ac289b8ed8ef889c41d80c903ed6906737648c41b3d24aa270b85adef672c3a3e34e7ac46a7faec9d5fb25b4d11933fe0dc985310151f53170c9c3977c7667a178483617089c6898bb7786a42c619d9d683043367d17265dc28e1b13c58ccc72061c9d47a4bf54f1a503885f8609bd0480a1afc12918637f1524c35b4cd8f8aaa9dd61c49d4451169fe548829e531cdcb2ef02082cb0be1daed4802a4f413096286f408cbcc171d99f9a2be9e95dec92b07c2712de8bfcb4cb8c98a106da59fe31692c884ccb06c2a6764bde5f0fbbee8cc68653a2a067aca11a3740d705b7630e305b9be578a37cb5f1370d76b8255e17b0f7aeea9d015502404977f0f7daae275645dff5a61e57f034bb6ebce037b02616997084a3479db1f7f2b8c362011c8c9728ebc4fa80ed33310faca48996dfe30124deacbe6138860e5771d3d9dedf1e8c312ea75184617af30996b29afe6fd545981c5eb4e6a492940de5157a94e93cf1a4e7e5e0dce7171654cb1acfeb672c0568671d67800aa2f0a19028c09d19a9947d53e051760022da99651fe983f973ada4d986a2842f2626213ac3f1eddd96b3335ddd43136e5f0ca974eaa6eddb98d59cec59e5d52113e5732e442eef97a314273464107f6807591e16fa61e6e069ba520600babd4c7c7ea909f3026530517cdb1f9b3b6639e275d424ccb42f60bf7e07956155460181e3de0cda949c903e0f86276d3bd5e49b572d19665afd67b9daa7f7fe3670f94121d54b86d7f197afc233e46b4a38c0a8f78e3c1239211ca866a298e4dd8048eae52d4fcc81e631a3b44c0e02706c23f8d457dc13a6a8d6f876bc1b34d777f7f0e577feaffd77d6ce0d5d2d4272e212a0a86efd1a58e6cfd4485c32e6574658b134284d5be5935aab71c9528bf1c05fc3aced3c4a6dd7519259d0129e845f96ed2f5bebb2dfa979261ba8937cbd7deb88b38962da67a21734950975e2047c303110dcb71499f574f1f8a637d0eb50e9ae5e7d58a87b468173357106e3cb1694b1e7b7f5dd81ddae6937e924182910a78ab7de8255bd9e39c957870bc00c131e64b2798ceaae659fe29c569ef57c3c9db4e349ac77be41f7eadd0ba94f19f661d0b35366d08a2810c8000232956238641c7e798c7d8803da543f5f9617d149aa00b5c27f608bc0bb1a75392ba508fecfe35c05031682f24ad6271b512d91984c94fd29d7c9c52c34cb7510f2cdc6a8fedbcffe4be7bcc04d0ffebff8d65a476f33c29cb3353ed253a1c674390d470cfebc8be3966fe504865da8b48992b15a3cff6e9ea18d7a35af4ba995424caa6c8e745c844ef2bd48f6f838fbe4b8a7baae9f9fbc56a71b255b8d9439ea2ee387972b2f2282cc4377c8cc6a3bf3c71f9bd028d551a84de29260a0c00969114050b651359c38b4cd06745fe138f870370048c515472d7dec655c9a6b49327b47b4087fa78c38aea43a1905511b8a3266e79acc4650e5076491a4b98f4b39551b1c5232cd59363b649feeea0c01fe85aa73be2484e54f4e9e116e800c24caebd2f8e355b8a8a13be9b8f33e91f08225c2588ebd41ea09bb4f17c9c2f23781c92d73930890c55fa091107e201371ea159b4bcff2a7b5735611bf8f0a7f73d7b5e6fd1b5879fe2511f6130d8c8b4e659435400b1a0a8376c42f4eb655bf298abc6ea09a0484c4b6c75892af696ae9f070a0d39f80b266884f191fa3dff0d95681f0d99b73581bdd5b66bd85b0b5c97ef745a8df6ac100b306954d681861013a972050e51f1102a3f9d598bcf1f24656e6810bf1fdf0d14c27c55b886963d0e252252e550baa76f42ed455e4a74cf8d0bcc312b4abb2c8b9f3da855012404daa5b2fdcfe40c9114e7e8a15a843a8e71865f17fc01889440d867a9b657494eb6b7c8b3d47dfdbba1f34b151d5875788e8cd597d8b83233167700cb7e0d81c003f0d9a8f46de7bc1b5659c7b99a3351d6b804a60e5b9fa0c571587a559f51b7d94451a1167a142a31aa5cbf75c9579f8d354ca59c9e2a6066ddf2110b51c6c67881ee825c72b34eea71599fbb7329a4545057c97c00f5dd599e9bf94df6dea502a9b497d821f01dbca07580fb1467de4feafae77d58146a8dc8680e7e94aa6c9ca1650499b3801ea3bdaf75ca566c595fa3af2c57911fb666b6a85d362b7acbde7077f2a9501a3af55edb46d152cc9c084cd247eb930d0202383f38ff864670174b69c6f05a5adeaec977ae377491e0cdef3d9d45fba4ea11edf9d513ef470b9c9d9d3c49e891d3da5702594e342b12c525b7feb23effbd36d3ce654da365288e01a4844f640c0ee142571d6c7addadc55466dae72edfb8235dc39c7a9fccdbbe86538bf8b9c17707df2e5dd9f41e09b97a4cfa369ed84b560b75a1a610c09bd03b7cf9ecb9b67d90be369c4e65576d4643d26eab015d96de79369cf2345fab13943eca503548892c1ef34a756ba0ef4cc8f551a5343f40da400807616fbbeb2e3450fb892ca04f07d6c787f0495b4bd400bed3a9c0bf64304fffc5024a05b25618c2605e63f3dbe204aea81345b768dc0f5da69eb793352ce917fe10577f1e42fc1e1f93fb6513c80582500b4478933d7fac201a18e4c3f226b32bf02c3c58a1784dd4cf9bc1fe390554ca16ee583499ddb964be821f38473a3c22d03261076c15ac1cc376d51d0a193153f07307ff97c241f3879505b8665bcc0753f26d24ad07e4f330dee735692054ba9976123ee726246b30aace06437699b7a608b7f2958d28a1e961b89abd40aab05dc51a7c9273827e91354a4051be01b3c609f4df71f9592ac8c7a467e5161e8ffeb717d416fa44ba832fd33f21bbbda57c6cbe495a5db7a9bb09aa019039d8232d46c9bffac7a53fdb463bdce4a083f8b2efaac2f87eeed8a9b920ab8ad41bfa868ae619266d0acac50758e796ce5867e5b50b13f2f431abe301af1a2c6035263c42bad1211c044fd075c9b943d08f32b4a2bb421b77da55eb14863e2d413d00d666aa066e61bc5862210ca8d8cb94553c36ebd79fd42074f5206cd8e3cafae0699186e787902ebd1b25fc9c8e36b2d3fbb8471ca9b9a61e17e39a58c960d4b119923e12cdba76290e929f9078a4a32a783bac5313422b3b788b23fddd461e32815fcc2c45ce745c0b8340256aee336dee091c122fab154a729b76154a6645b7b2085d527ce6d0410e8b0f6af50e18b6eb27599f5cbce09c4acca22715350c866045711fe65399bb89440b09423f15a0679ab7fb92ea372f3e82c885d7f401facb6dea80fc0dc804df78c49e302e9b83df5d5fc1a6e8477c6fcb5653a023db72365582127f77f891d7bba338ef81caad1a70738e6301cce35a2a5011718443e65485d184a8eecfac5fc220d16e7f495ece52691b932bebe0e0ca8456c664264aac4207f2114081f9cf8acabf1dc88a859745ddf3984ef01969e1b3718d58c574443dc41baaeb7cd5291549a5afd0d47f0742c5b2395a0a82fa2c4426bae5ac6db51adce2d653016d9416d9478a81d808ec21a4c6a1889df780d1b3e7e9687e77d9aa8299732779ef5d9aaf49d16f7f7ce590053b0c52b3e53a1f198c3d587a709a74c8738cff60c79bdd99a4190edd18c8245cc81dd3ed0a4212143f608a94cd79fd8ae2e4acee83c8b4a1821869c30f2c3f837b8d2796732116f082aba8ee782cc4a89d25e48b97afc4761a5fbbed953b1129cb3ca868ea523843f864a03957e88abebbb6ec7dc5e6fcb5c983f6b73b5062692423e502f57553848bd4ef35344e85d16acf24e8c853c284a1d4d73a023049ec078ee75517c0f5e03ae1c79ade580f34a82e399f3be5acfc4cc17e9c75d1fad0f2a9402b6b61d95ffd5622d9ef08aa49bf93eca800e3a2e16a57bfa7ec6dc23be2cddc2508e0cf125a29f7d632e910190bb2d5332ed50c679b80887668254df861bc3c07f338259f1b95a61b054fcb8ebdc26683ee91afbc952e4722b2c50e002c0831179f29238b7af13075acece5cf8e9b8df58f3afbbec40daec1ad52edf8f9d8a247cebeb581f33340a074159cbe17e4d853782734f3ed17748386442de12d10b7f4c1b321d98f16a65631ebdf7546685c9f1ea0fa290af6824a38fb0458ce1498cd62cd9719b7a9d8ed77ad2f8c603aee2a260a50d451a4e8429a80ded1fae29a3595c4b45a82562f1fd641032f5386e25cfd6ed13ea873a2457692eac181eaab7b5865a49ed97d572cd9aafd682855d22dea1980cdcbfc4c540b6eb325ddc0c15cf475d2e9671de823e6225fa79bfd19e12418b9d6c223043ede4978fc388fdd10a3e752f4499d3ace5265ce723eb62405e4e0554567757aa7259fcda511882addc40fa3eb22d4fab3c07e29101314b7185f8c6467a58de0b63831f15df501b7d4f6577d853f804232eba508d259fb08aa1cb65e3011449ab68dbe301b3f9fe1cd28cbdee306695160086722bd152e842764b1a6734462475bf9fcd1fe8ef3ab7cd65b6cbc7e3c118250235b6afa6fa86947fbff05d320a5d79673027867a487ba0783504534a0e2aa22cacd8537780a9fde2733f7b29a0e4cb6d1c709fdaa33811e12dd69811eb18010395c0b48b9556f0aaf1ceccb492b00c4d59f581c4ac12d6b5f467ffd38ae41f48b0bb1633cc93d4a5abd3845f06b328d3fbdfb34cdc28ccf392703737293c10cd0e98a2c70c85b73a273b69c9e79c4544deb9dbd5084d4051254966dc384bd9e749a7814dd467738f53d6c671625ace37a67b91d0e570168455d778d23fa63e4e4c53fc4fbe3404ce74c8ab83e7b8781129f41ea0efbbe864e29bd275c88a58a1f060795353d01f639ca4a8f896b30fd78dd228898d9b6a632b866f89babe4899a0494830010d0df9394d4e98c82e19e38763f00654bfe594f84ca1b77f6f079b25bae44213e9a025225f14530ffeeffaedaebd5e8b173d43e1c74f6acabbe42cae1e6efc78d21a4b10f3c31138161128d15f8084987ffe2c82e4bdff178f67083efbc1a033a7833ccb1711aa3b6e9a3bfd650aa94a218638a3ecd11065ae1749abd134c453f27d706c2f698a6d528a3f5d9c23fb844caff6f40ba609994631c413c1122b18ef17d9f07a04739d8844137ba0ab75308c7f8dd6a8525a011b34099cf3d13304016f56ac55f25b32a177c07d367f897eb8575254c395b38e6e36f0ebaef1ca952a7432d59bf3397f66607af81fd82342e15074fb4458e03474e64206a7af49ab6c4dac9f05054cc8b2144c5bbf88e99dec0d60cf8261b59b3f15dc6a9bcb09c8361fce6427a69770c7069cc437e1bf88686527fec40ea4d89f75fe60dd5ca4822a6f767c42af82904fece4986d2754ccb4de2ae3dc2912a53a9b88532cc9015822fe705d869309fbf65dc1b445a076066b4abc9d7906aaa9f370f71af7d08c1cbb20e0ec75d2e0d8dd612f661c738f27b8357b3d2bd55f7e407a4daad34567226e6a2e615a817a00c5ff1dbd1bd84ced6aa5cc2e7423cbb9c171323282d6a81bed6dbe2ef0f11415d94d2ff92b9bdf71d41707a869ea6b1cbddf505164189bdd134516475a1c810218597a8eb01166487d2fd000000000000000000000000000000000000000a111520262c3238","attestation_type":"pqc_registry","signed_at":"2026-04-20 19:44:31"}],"hndl":null}