{"id":177,"slug":"mlfoundations--mint-1t-pdf-cc-2023-23","name":"MINT-1T-PDF-CC-2023-23","author":"mlfoundations","description":"\n  🍃 MINT-1T:Scaling Open-Source Multimodal Data by 10x: A Multimodal Dataset with One Trillion Tokens\n\n\n🍃 MINT-1T is an open-source Multimodal INTerleaved dataset with 1 trillion text tokens and 3.4 billion images, a 10x scale-up from existing open-source datasets. Additionally, we include previously untapped sources such as PDFs and ArXiv papers. 🍃 MINT-1T is designed to facilitate research in multimodal pretraining. 🍃 MINT-1T is created by a team from the University of Washington in… See the full description on the dataset page: https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-23.","tags":"[\"Task_categories:image-To-Text\",\"Task_categories:text-Generation\",\"Language:en\",\"Size_categories:1M<n<10M\",\"Format:webdataset\",\"Modality:image\"]","license":null,"framework":null,"parameters":null,"downloads":155045,"likes":10,"verified":1,"created_at":"2026-04-20 14:59:21","updated_at":"2026-05-01 07:37:41","source_url":"https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-23","source_platform":"huggingface","hf_repo_id":"mlfoundations/MINT-1T-PDF-CC-2023-23","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":2,"risk_level":null,"risk_score":null,"versions":[{"id":176,"model_id":177,"version":"v1.0.0","manifest_hash":"b04590686c2cbe8d7f3f42f49cdcd5a673b96be8dd40c56e2bdb51d9cb240138","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/mlfoundations--mint-1t-pdf-cc-2023-23/v1.0.0.json","created_at":"2026-04-20 14:59:21"}],"files":[],"signatures":[{"id":538,"version_id":176,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"aef9688d2c99d5af149372955def5fe805fc86c9ee1c983e365b7a9ef38c8492","attestation_type":"registry","signed_at":"2026-04-20 14:59:21"},{"id":703,"version_id":176,"signer_did":"did:web:quantamrkt.com:chain:authority","algorithm":"ML-DSA-87","signature_hex":"3e363ea4f6ce281ae592a78492197726a004bb76f556f3b377726febeb03d2ca1100962e40487b0f2019808ab704c1b6b3d44a71664082a17b20afadd12336f6586b97980c7425cffaa11b6ae66a97e5ae4dd3930add0da5f182c9fb747ff0f517746ad5010b9a17731a34ebe0467a80e51276c3ce87e6cd49244e3a2690dfd14926722bf212ba8092cf883b8b6e54812ffa8de9adf574eb98a434c3c842716444c0259460e225345d8b85c037391cb50cba2c64bff727bb59012ce4d087ac846e8483cfeba32acf2b3f6e4ea82d4cd802dc3c6e3155017208bf03f6d405a9ea6dbd608d4664ed9f412a3a83d9483d053208d910fe132e0862f2ecbd31e84dd728f4aa8a2146f1204ff214ca2c46bbcc230849499a36ae35e1401ff0cb52bf01602177e01536be6f7435b9d65bed28b104e3f7070c8582101c028cffc78c5ec961ced898bcd4f8217848fe3d1f5d9598add904d8a39ccee74c603494e3699f5b9da0c7169f7d9fdc8b33a378f3f8ccf6994676cb4b3412e1df5835e3dae281ee3f575ed5cb6da925ace64ad7c55c2ad8f6457616190870d0ef981475580b4a12690c9dd66096ca467d051017caac05e89e29bed40eafb2195b7ea08277cc970a0aaa245a3f9018f1179b9bd5229025254a3aa92eb58ae950feb02a59f4087bbb74f537f0bae54f605fe6959005e4ae4e8d55f3fc803e5c5d492c0d6c6c395304b3b5eaf78f27bdf1922349334a2ba4a6f91dad7c0cb05220de4bf9ef30d44762f3a1f7f99ece1a58811877e2c7c2f995cc38450a323c4c704df38f13353da9b3591c5922e3efcda9596736a6ff65201ed4424d80e940081e6a82dcbf0987419819e2a39309dfa455a2a41ccda080ed03ba104485a707cff3b545cecb043f7f5ddc7a34b1184bb99ef0dbc211fff7655f525546f475eff50e70869b435e4ab6b3d1e65384922d39b43a2c9a26966ded5c564ae6ef8417d73c42419bcb9a8290db89d28e95ec0ffe08848eca24a067385113d747ce5bfeecd632851740e5c37d99125a8d88e619b97818d452e10db20c384d57f2814f8ccb275daa92d9a9f90995112e5f7cbd2325f4fcee0fd6d425e7e7f48c84d67553d82f628a001f04c9ffff312f791208376018658c169918a799ff854662ba27cd817ebc6d52685472ff411c252278c38b527f585a6159de8bbabab893e54e926c23240b485a57c663a680e22dc65fe61a808f8042caacd56f37f7b3509252585590d2f6875c4358fee7befe3c1470c2aa3a45764edd795699c749ab4105a02314af0422de97331ebb2ac2e93398beee5b1738cc154a8d950d690182e5562b2ff670389706af5303b4c1ac4baf67248cd75fb60e8ad97ad792288f4aef9ba2edf71893ef0ef53a3648105b39227ccbc6942cd3ea9c0e173cd58dc12c7cb1498c47cf61bcd8169f0cced0d81d05c598075f2944abd4554bcdc794240664c1e492466c4fa0693d36076893b7153e463598a27bd782a78238e202befab94f9b22e2e4e6f09ce303a1bb74cafb804e3b0866c75ad81e31d8b8f51eb585fab124503bc17f0c9b0b39d70ac114d7c521b196f2c22eaa4bad8ce8636c9530042fbb93b113f0202f1bf10217daed08fef122f6fe9760bd843c882e0e666c3bf1842cfe0bae7c641692f79facee1bb488da825d0a2132d418d4b118a3d51d94e558988a3994b0f43e72cba42d380c541bd16f045342e079cbc75f482db9cb8eb77ed60fa00508b4df6b8fdded0aa3515d5303668000f04056355bc92aed7a08374b053fdc43a6599bdb23ec6ef2f3cdb25db18dbb74c625346ea85610acf713645f8934c724062d46d6d1287942a7670c3f31e4984927a2eef23e31cdf79b1051f183ad49ee73f05b5110bf4cd4bb32736a8e04604472681b907dd095c2f0e4ea1ec754c566dc44c9268b253572b01465198ed48742cda241af1a41c113a9029a7c1347843a02ca19be97ec99f0a1bee6a74ff356f249f6ce87181c5e1bdb052690f94d38150eefe9ca0805a3f007b9a0e156531687749bc2f45c1ed8733d799abdf6a9f70e9d904ac27cd5ef21dbec5b380188940e85e9729b6c4d757e1eef3139a8b28849d8d12de8e218a621fbf3a0895984a19dc87549e475305736d95d4a747b8c3aefaccd5f864d29888217f65c20c3a0c076a0ce5aa60ef099e56a79c5acb75cd26bde7529dacb6e8db3fea1eeb478ff45aad5a398a64da9b11a3365d5407ff8eac0e8c1d165d68031cbf2df1694141717147d17cfcc00f3a6589e1f4c2a257e2f0e55a0437934033b4402524893bf3fbef9b07170f584954d0aa6597d6ad495e05b8d0562c3d4244990e1e154e6f76cb0eb2480d23c62f14f0d65c7438dcddb0416e5b74397dab85284845ce445b693e00b599fdca585d7747080ef3810c5e0a46a56dc1d065532a6f5f648826b5609fc1ec4419c56c6ccd3eae579bdfbfe62177b121cb056b9940b18ecaf9bed5881ef60b3f6e1e7de854671685be67f29a9469cf3b5b2fb796df2eb41bcf6a808c867e7cf1c340e4f7c99ba8843acee683f1beed3b2f5763c01f8eed628b825b2e320fa9890b2778ab527254767ca5bb27890d1e316383eb00de7eaabfeff0042ec76cbaec5914a864f249abda6d767ca7f3f57037d4e66456a4c82ecc00ad24c6e4fde280bdd47209bd04ae1de25cf0fb542c5df9af74852bdb6dc4d76ec7a0ad2aa71c5cfb6ed77cccce90e6520920fdf12dd8383cd42e75591ee829c32cad51f5dda4b2f72e76f62b8e539cbbd64e7bc082374081f2a9f5b0d86e254271030f393c66aa2a784f547f2e97c7345c481ec8af9ecda5e0d60dcc45d66e596f52390465f2942ef3c74dbd0444bc632d0180dfa59c7677d884fd32f5179c39cdbd4b3b8cfedb5d565b9ff09df98532371b285a8583de8a29ee531f7bd784c3dd0e4deefc50d462b546a9675a3606decaf9b3c36a11219a1d9258ce965eaf55bb3f1ec4427e6907b483f3b210ca070cf627413c6a9f82c221e5b988eabf8c10297f0c8588dd0aa6bcbace32cbc61fe61604138c1329e94fa53ee3a90e9635df06b9c60259828d0db9ebd8a5449a5e81b9f1ae37a8740ea801057e7e92ab8623b39ee3f14ee2081fa86a8f7a7ba600bfe5fb5609c51831f3ad384b62fd903580c01a4fc2c47dbbed5d6d729436bf86d4e53c668551fd1f3d6f8beae063897b89dc9a443471a804be089979bd944bf7d81c0c833a3f4cd1674fe2228723d2d7cf90ada614c07ba59de3802458a6b330338dcad26d4f963d49ae8e82e0a217f7194cbe5a4e31a0b3af54fa672aae3f96dcf6839eb1e85042a82b14a06fe73d3c8936bde78aa0beff1a4e91cd7ecf1ee8978879959c2e745f21840f54ca7a8f43e646b8a44f93eeb06b8fc6ddb73a34492c68d5297aaea0bc72883f1727ceec1f8d68d48fd454bbe9cf7a3da7e5f085e806370dfea2516b5f3b0873e6aea7630a0b087dc566f3428ae0043ccc390146cdca5c6b07fb93ea2a20f6352d23f68376086bdd5e80043fb71fcc17c1900e257d35335c5bcff7b6af55169090e2ed39e053a20b6f5949ab669a12f6feea552d749701a037844f6c23450ce4a19149724fb0cb3ed680fdcfe252330b8a71dc699c6fb32297d956d391379ea4f06326945a59aa32d4837e8fa405aae7cc6c6161f6d112b7e980ce284342c83983c342cae5d15b078676dc3ea1f074775f9b041f4d022b6ac8788b6ace2e35e561690733afe68dac9c711128a650432603a6a64c2db69ed8d4aac7841aaf9031fba738c13ba238f232ecf9ec78bd670fa4b695458ba1fc29e2f9f83d4cd37253928dbebfb1edbd9ab8a13cdd0e63b74f1bc89207f915cc4070bbde204af821d69da48219fb4930ac39957ed9198d460b31ccc6be221b69c3136c8c16a2bd6f8436c75889554130c42eda6b07c28b6487aba1f648569cb2b1d6dc26d088205228c9a26ba1becfe85705381ce381536ed93daf0dfecbda3221f90092087ea925e50683710c62fbe1fa57b7e08700fda56c004699a77f4fcce32e06972c6e9bc788764a0fca5391d4fb3afedaf81dde5bf6bc2616ce4ff243134748aca05e8bbc4c8f2a10497f4dfeacad0de4e6c984e71ebe03c7a4e31b099e1824a95f37eee0b0109192507cc319ebb939893e6cd87e3f7ef6a1f5e312c3971f185592fc8d04737fd4708d967cec80755f146124ddd55936a0e22292deca1a79bab78b26433a48d44e0bd050eefcd55171f40d58f48214c88c42697f0465114332b1e6d81ca46bb933b3a5e3810e5f92ee0061d692730b49c6810229745047764538e855b3432d79fb03954599938f67d3c624c78d3ff943adb6a874583ff46ceb7e1173b493736a3033134162e6a107c1aa60e9840692025de554cd24cf26281bf74c67a18c4a802243c2ccca3c4ff9e5a841560b69aa99ed2290d6decba4d249023b274905af65c1b1b8fcd2c36533dd43406eeafda56865a674d6e6a1ccc437462d3bf70aa027fe37b017bd081e61b918bb6b018b217291b236f7926dbf9e6cc2e4a87eb53a03434613a320a02e4c5c6e9fbeb2aad1d2cc925f7c11a21e0bc1e549673b199179357f8db844945fbbc7bbdb304266203b08076945303a0128d2dbf84637cf04eccf20c16efedb12660122670fb2e90737d0fd7aadf7c0ebdf491e342f45728bdafc042eac29ca3dd8b1d384487f012a8c593c17b7a0c7455aa88a5a0b5ad84645916f094316e64b9bd1df95b34646a3e9804e5785fbd8979b876fbeb492475a93ac8d0c79448f2396a06836ae7ebd2bded97b882745c7467da8c29efb07652fbe1692b517c757e6f8b5a063167b39dcdf40998e95de4fc7107347c2bc79dca943f7f3fdf3ae47da4d1590e717d0ace29fdbe7ca2d2ddbf2b070176e8f68a2ee619b403de1b04006aae3668197d1122c775478e1e9d13c9eba791adf98de642cf3e218c6b8c5b5d366dc972f2558f343340b2369d6bda623e87826b62592713bc71fdf10459a369c81e43926bb7c70a2cab965e62cb97e0e336ef4bf427fd2e58e74683b182eeb463bf130e71b60fbb59a4b1693e6aa0e07e06edad27e848d522f9dbfdbc0fbad7d2eb92816ed8ba04e80347f59bb4c11cd7f48356970d8a3b7bb5da32fd3ca2877b6494964686c97d4f6503b8d32a648a39cf52bece3f6d7c6a1cf3037aa06ead9777ae1f76d14982434347fc3a98a588bf154397a536646a2a19b2c5107ab028d53706d7bc84fafb6ca13126188ed3b8fc8e8fb4b46e9bd91fab657f5867e89fd8f74899ec4d560e53775750eb6f3bcf1e9bbbde082b3216596e86fa2f6463713dc2dfc061f9ce4cb90109ac5eaaa64b8e604a6a0964b87781606b178c292f5f65abb0933c71416d90b25da66800d24b7743469d569b045bc947ee18f7bbe52a4b699b9e4e4286c6727929606172ec276be2a78c909ebee11911407de57412e16b9f650ccc99d9d293877b49fa0c7c6abae368a78a3a59b055479fe6cc539789b78001e12ed3a6f627a048f17869e578797df77c3dc8926632b7f4bd363718dd7e41c73da6605b0203da6fb28634bfd732631e44d486eb4dda59a8f23763dbb81a7b1c95ec73207cfe08281d13d7d5f56d04a36782dc657262026099fee0765354532020b2c9b01850bbbf320f37edcd4f1c3aef617cf897ce7cc1d895411234f2f41a17ea4329666f759f6fbb3f9d27b59a6c968fa340986a03ce9a23bd81aafea1b5d0211d48f6278de847c6e671c1cc50dda51c68e5b23475fff986eabe5e1a6cb169a9ed1927457f69a74624f7887848d98e66ace5bc1c067ed8b17a2af5e4ca01c6656068200003297db4d2a9a9a5f34e1d231261a35e521a171a5b9560e287531d763c737cb0eb44c48aab288e90093d9b9bebbeb40895dd736651951ef8f533c5fbd0f0febe718a363bee8d3ed581625f201a621c62bb4b6522ba9f2799dc6fda9e4026cc84e096563b0b37a4354a17e2e5a3a4732fccae4856341853a09a78243dfe29ac270bda73b90e7699d4ee9cf38b334481012d20fff6a1eb791f27fdf247b1cc579a1995928924db1b41dd0301f5b7b110e91d094ee3341c182672e2d13b94bc7c1a2b6e0349fef915078392e0b4d8c1bcde8b118c3a9911f1efa8b15af27774ffa3c75937a88bb90b5814bc77881a145d5790fdd393cfeb499d6bcd027f30cfe5b89176ef278770f80245197b9f0df373fea95cef484756c51fa049e2799cfeaaa1b2cc3f79074cc8264c136afe9cada5ad87a542cc68f646f1243768e6e71d757d09d6e7c8f78ed4beffd001d6120cf5aea68622362fd688738cfa440b3b71b360629e4b6c989f8a894aa260549eba62291e058edf6c93cfd8b4c69b7102293635032f851160d2655b85a37cd8ce35a536032ef337207bc124438db6d61a595c6599b4c6ecfb37b9064c7bb5bfccd9f525326aa1ad1a282a65687d93bdc5f593a70000000000000000000000000000000000000000000000000000000000000000020710121a1f292b","attestation_type":"pqc_registry","signed_at":"2026-04-20 19:44:42"}],"hndl":null}