{"id":228,"slug":"mlfoundations--mint-1t-pdf-cc-2023-14","name":"MINT-1T-PDF-CC-2023-14","author":"mlfoundations","description":"\n  🍃 MINT-1T:Scaling Open-Source Multimodal Data by 10x: A Multimodal Dataset with One Trillion Tokens\n\n\n🍃 MINT-1T is an open-source Multimodal INTerleaved dataset with 1 trillion text tokens and 3.4 billion images, a 10x scale-up from existing open-source datasets. Additionally, we include previously untapped sources such as PDFs and ArXiv papers. 🍃 MINT-1T is designed to facilitate research in multimodal pretraining. 🍃 MINT-1T is created by a team from the University of Washington in… See the full description on the dataset page: https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-14.","tags":"[\"Task_categories:image-To-Text\",\"Task_categories:text-Generation\",\"Language:en\",\"Size_categories:1M<n<10M\",\"Format:webdataset\",\"Modality:image\"]","license":null,"framework":null,"parameters":null,"downloads":134963,"likes":6,"verified":1,"created_at":"2026-04-20 18:22:10","updated_at":"2026-04-30 23:10:49","source_url":"https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-14","source_platform":"huggingface","hf_repo_id":"mlfoundations/MINT-1T-PDF-CC-2023-14","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":2,"risk_level":null,"risk_score":null,"versions":[{"id":227,"model_id":228,"version":"v1.0.0","manifest_hash":"1102ee4b64667ef9bd6e8c98dc4eaf4b761b4f84a6d580f481b389fbf5c0e421","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/mlfoundations--mint-1t-pdf-cc-2023-14/v1.0.0.json","created_at":"2026-04-20 18:22:10"}],"files":[],"signatures":[{"id":589,"version_id":227,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"80365c542f3927abf2ebef118a391cfed5ac6717010839d0d8f9c71fba95c669","attestation_type":"registry","signed_at":"2026-04-20 18:22:10"},{"id":704,"version_id":227,"signer_did":"did:web:quantamrkt.com:chain:authority","algorithm":"ML-DSA-87","signature_hex":"4f21015326069f8c2e90f8a96bcea423f4ccd3ff76fe959b4557d703c913c4e44e4c0617c76015c7210c6840045cb9f236eb0b64d50dbdba7fe61bb9e74e45fcab02093b8ffde6a422fa73893c7af7b998c60e5c1402fb2c37848569e157debc5522705e99e54c9cd80c58b1fc340f57171e1e43065d1214f42db5cdc5fbb493bb4e0713927a92b51e07c344ba3c7505dc64abcba3f12a82083b354078498c2c549562c82f95b8961f4c9484a746f16fea29dbed35938f8d0ef19fff4ef42f66343aa7bf9b8688e1d3d40cdd14f896e37467d092e8e11a888b8cb0e841fdf2d230c9de7a2c598a4eb6699f68a8310782adf551bfbc4c785a22537992d3f65f1873e2490fe189c0e1a36904b7315f947a8467f76cf6738ba798221f04fa5df91ae0333a7e99e62cdbfdf0cb015e598b624534528754f6d275347be732a1257929cf3b676ff6d2830fc797d1f29b167bb9098acdfee67164de3051ba29318630d4075ff0bdfdd5eeedc2c7844488bc23a526a505d88fcd2097091b2c59af8a4d2dd64f1afead4128a7d30741abd33319700b729afe9f699b0491139ac71ff3eae8dae80a96383abc814b9ce64d82249f9a2d76e324f722db44627d9dfdba1adbcfad569b45e29f3dbee1a0776babfe45118114ce362f29aa5b6310be95a8665b94542a00cceaf4414958fd03282e8164afea61ccde570d49a4b188db12d5165581d237c68b8fa369047c1177981c9360d1d7f4de2c7aaf9d201e1ad58fe2a336c8320daac9461dbf757a3b43ed847edf0d5ab808ac2035da219b422fa1e12acfab5f2a8647f055093656768e11b0ff77f96d1ade3e06eb23d595e93693449a542ac568c4c536153f12b14d97042a7211d0e931b1c690f267889dc74e1065265cfb5fbbcf2102f2de115056ead61ecc978953bbeacbe5e1b75a335c3c8d9c9ede66672593aa09ba8a31006dcd92397ebdfe27ec7dcfc93d392abf45cd3cbd3da4998a28f0643f085e0e3bdc5c563c31581f032b5d97aa9485c5afa034936106910a294145c80ba71cec94670a7f8ed60b0a387a53119776e11b9027b32206383f555058d5bb2dc79dab8fff2d8c559c9b2b366be57bd70861c855b7b57f6f7dd389223b30ea10a3340c8cf44f99481f3c8c8d363d20ba1b627638b3ad02b4d3d49b2cf77292cfc1b05968cdea2a69000b40d90902219436cd9413d3d58e88c73b026edf53319b913c6334dc7c9ce605c26ea9896fa037d95fb3d0305cc13f9d7b19b8125366cf90120eab531021fea56ca4a62441544e3410cea1cad9b78be1dd08bedba46ffe952c42b15388bc580c062e8c1d7535f5a7b91e40f110ffd9b86ab24a148dc32b83322bc4dee8096db28b4dd94fa3e06b0a2cab783c091afebd91b1a41e069714573b86e90f4f1aaaf512eebc94a674ee9334f8ce2cfa95f9fafe52ea5fec956a17e475c5b932a91178f06044536f00d7dc9c5a7c6df0e92d43483f5d29bdb64156f5fb82ba306e718b7e2d96e99e79786bd39daa37ca89224ea9a0e2d57df4717fce920f41bae825ea0a6ca797a278d05e8f6c59706aff98fc1e7a3be98d6636de79582c59c86da6ba3678b9ca79d0d92d77e2f0b309f1400d91f3940327d1c1d1538e19379b97af0fbf924a4f1857d4cc4ccd2228c297258c71b6796b74b73c9cb125fc9c1ac999a26674d7db617fe4c43ea8d9138b65a1447256cc3e06396d702293b5cd93eea1e31fbf576c233a144619967d10081f587cf034fc296ade3aa3256849daf2562207d26171786da564e81fc0e120b3c6bf00290da88eb45caa90a317e63a970faf1cfcf2c0fb45a7311ee94fbb6a7d340dea7b64aeb6623a7964af27f5544f98dca1dcb93dfcd2e47c6eb3be4c38cfb9f611db36f2bec7cdda0cbaf73af9076595bb38f11f33cfb9c309b01fb885aba5672bcbc87d07738fb06cae3a814b252ee7f02b2704106ed0222850d6d3df8160c4a08527e1eda92a80b0e866ae6220cc2e70d62d8514c428524e4fdcc498e983e02a3bdea6661ee2a4b04fea25993d883292206996ac8776e46def2389c6a6d81087dadf3fed763e07cba965492d925f615564254bae8c02d1c8625f5325c3b33801fb069a9a8bd225e39abc8d0028c4cd49497aef8084daff39787ffca9f3fca76606e4adc12baacb490db58fdb7c72bd1e7c83c2128b396e585d3dcdaf8c5d57283a78027e1a14c305479cd2c79f736534fb55d37f916985ffff8dfdec0dae7c1320a1ee153eca0caca5383605056f03d2da0fe7e6c640f43be1bf934a8c333d2b62088efa4043b1f5ef7564325f9b839357d969bbb8bb753fa2c81276498c1cb65515ee7af5a1cfb100274b7aacf865e33e36b2ab7079a8d608c141ba0ff87dca771ac70e5177e1cbea2f713ffca9ae941fa78fa3740d82479d6c03e3182f98b3c9bb048db9aab9348d9929e26d203c29ae1843859be8e505f5e9c74850a9ef3690060c1748bc60c54e2699baa75c8582d8b9af1ec80fe8b449c8635e315f3fc9d86e65fc1f5e52ad500278f9b0d6a68a221796ad84063ab95fb501146897e086eef2013ca4350e023cbfff5abeb48505d82f30dccf4b46395a16df81c2eb377545ae32fec5cda29ae6f9139005afa0c5c9d0d7a04b88c5926eb01f089cfb622e61716a2b43e13b186fb3099f0edd8412734e67c2e7a6aad1ebbdddf169a67d77c99c4dfe264601c40f4964c3e66dc0586b46776d9fb6a175e2e1f3b4f384a33bdc43d02b919639590beba26d142927e00664d878f00bc9723246059b038533b3c036fbcf0c57f14c7b050d67863a85e0f034c0ae4885dedd20ae19437e53eaaa0699f12f5b1e666b51121a8769382f0900aaaf8130d6e0c41677b380e63b0d558b7bfc0b00143bc096f4dd2723a0b6f4c41d72ef73c3f53acca81d85cb4095b43eaa75fa8e04b29af0bcb74c200b07bf7a6a4f0bea7624aeabfc2f7fc4031221f5047f3b9a8c8cd210818b812f7def4bea6f09f54fcfe8727bf16d1d50737fd96cceeeb4b31cf138f7a81f64b632b9da2b9774eed49de033c68ff40231211cf972f970074ae341c2b399fd111468cee29e25a31c57053bcaec809a14d266094bb8cd0e4f3749e5bcd3f4d79cdb73b564c611c0d3801897adef806383c9cf2e8639a215659dd5dfd6d08c8fda9963187d188376a40a6c456dd047e2d76937bededc619066d55cb4ad3240cc548f83925408f78d36af1f860357212ca8af4a39fd1bc5740d44105d53cecefce2f222aaf2213730aadabfb96223049b722b15668c7549603e1033034a9a0361931cd75dff1ccc12cda502fd207c4c6ee141f021d53014b6559e45a96b320d66d239f9ad4a20bcdb72e54de53307542a11df276162fe32208e0c7315d503f0a7945816f381c2aa78d8334d84cd3cb32a650dffdb19aab49c5007a9a07863e67c64d6196cf04609af2e15861cb5e49f718789e71f507e51f2d9362bec822c821a57a08501bb3c6a21616dd8f0f803907d1e4faeb739d623c5938d5ce93420349aae2a3f7404cc8ea2eb291a505b68e395af1108ae752ec827b8c8121da8b9d58df7fd91eab85b26383850444aad78a3227a2d51c2611b37d59b925c585dc8f3c041e78ece5173a85b0ed045d05bfa22a3dea61b61fde2b6c47b82aaf424ba881081662c60098f7628ba34c2418181bac18c376a1c42ea094a3b8c3e7b8f607d117eee54e097fb72922794cbcf6f3ab6c59c6cb240e0f10895827653d597f3223ff0e41bcab51dd1b0712a349bd18ef838fdb0f81fd738f08721618e258d8721ab4d43a557fcbf183227f51c0aecf06a0622118843f7930b26f23cec9949a4d382667b286898c84aa0422fcf3aba7f5320ca1b67fcdaadd54fa39c150868076ccaf595b961be149552a88da2b6598c5b281c4a380f87e80ae0e18d4b6630c3951e7190f84c8932abf96af3a6dd2a9f1cf760bba26e3dec5afe226d331de4171a598bcd60b6c3ff4e3ec5751db8fb129ba0e702ef5e0eaf7309e2d8041ec709ef1a2df2361e08f523da3c2bbef0bfcf704d7265359f57676b882a0b6ece1523f254289e02dc3ba572ea4d9f47b656f580c53495aed34397a35c78fe4717cb436385edf5ee4a2a77d24b4dc337986999dd9714152da90db1c8b7a376f67d3ff902473ae514bc86baeee889024d4ce0f79a32647e07c26068f55a289b208ac85402665c09ce36e5a7e0beef7765afcb2c4c42392b2fe9fa07dcfcadfa5b3cc58281c2718bedc77493234bebd578202991156db6efee25d7e3315df9ecdb3973bc393353a6d9fd44609a06c11244af658cf6ae856e8513789ef7b91d58263cfbbc730c2bd80149d7b79f2371ce5485d09631ecacfbae7fe87b5e4290f5765ed291c1cbc3d685c2f7fea470faa57f78c79876761e51fb7884241bd52fd2e067d2a6e755a65dd9accd469ce94df7d457d0b2e45e72d7e67361645885018846762e6e9d2c1e17c470d1c6b1aaa6cc5c05fe447005bb6c6f073c5b0867e804880985f0710bfa3c5cd217ba19102fc2aa52b17992b5965f773a46f30ef7a2064a9c295152eed5f8f0c00d24ff58ba2368ece6a97038f5a4e2e3a43fab8311b190eb0bdf0eecbdff03129a25ad95060d260845042f242aed8b888fe4c0cf9ce6dd751261afe8f736e39974bcf2fa9d2521105a0fec324d1fa86c45fcf2aa198a6fdee926025e88666deb4aadb7783e9965489ec5f14b33deaad97f0eb572d3c22a96999be028665d5e80e1386ec6471036c21cef799fc4f3118d92bb0f49206f3ec1cc42e0ca4e6216e670ab14c65035da0e6ebab7b28aff8bc4c23fd03aa072ac9b411bb05b24f12e3c52ea097e084ac7c330c410b98693f48e45e7aafbb14fb54890e99dcaf3cf4c43ec0469536a924fd712b4c95d89d861ead281b2006baa491c873c8403c664cec06bfcbc185c9c20c7d85709c711e5f676bbbb8740059aef6b2e2ffee09f0a9926224d34d0b04a16f4650396a80156756fad0aa7d5df92006d9f3aa227e6af303481ca61a9a616fbe538707197a5ca870586aa1bf60c93598718dfeb3361d7da5737d3d077501d84605fa60df8cf31319f9bf3f6b2d9691338939a10ebab9039530cf33c94eaca14c5327f2fd0e41e14f3578d2497939a544dd21366726d2403562e5ad9ebdfa1db861018513816578747887902cf88d0f5baf5a26cd06ceb30da9f6da473da9002c8c164e80544d712906863519a54a793dc9ab9ae4462a06668ec94fcdd52fb734df475ec99a8cacba208f6f96c7f7327c3e9bfe3f4864b8425c3f2671d091f03cb8e6646e702200f93958a153dbb4b32a2e658aa1b950fb2a6ae4def77bef1e6785098949adbc1f84879d2f0ac6d47327b06541c99cb388a719784a03eaf53abb38de32ff2eb50fe0f8f1d71823d27b866fdb567ee29c8ab0d5beafc28f47516c269d5a53a5e47eef5017bd3faa0326f5d031fae5cd0509974b56b12b4af0cb27610806fa7308814e8ec2d5a29e5aeb716b16f6677e319ec7a60516d84244308ab87ac4121a18b343f5f157ed1580c0c6e5580b606025aedc337e49c71193b9006c23210068e8b2c23ac5223f400b97f00726c00b513f4ba7f2823ccca12d895f4f536c4a7523e9ccde20e2e8b3ddb516794703615b25529742125a2968ada4281028df967f3b8182ef528357c6a8221c3a0867493f237866b209da86af6b2137366d7ee83bcf9da00f682a890614e4caf41628896def107fb13a9d4724bc0a41c8ce82e3d8b86928a1d9c6738497fca00d3cd3b3ef30cf65641b5bae0708a397cd1d2fcc3e47b656944bb152fdb6fd13e94f0f1db45550c8bef9b33511dd3c423bb39d8c95dd9b732f0cd9825e56fc4886d0a78519ebc10ff5ba7171628aa3933eb45a12c8c82ad4b8846f757d4132f8d6cdee1aac5ea678643218c8c15533d4d62ac02dae1440b1d3a2ea6a4a8e08039d191bbaee60075401073651055592710d105aef162271adc1203fa7843866072a1bbde0a629b05f7813ad84e6b68a41fb3c0e38dd3987a6134d099e487aa1320a35e04ee6f87969cc6401fb0fdf1b4bb854f33636385a79823d00505631937008bc10346e5f3c5ed56fa293a45a8da8ad44316da01d0d52e301eca730815b57b05582e22dff770949c61e3ef3c56c967d02cb16a71d5e3d7b73851ba9c04f5e6500610300c517a33feb6e5949d4cbcdd94e2f66e442af63ba9d4eebf7df371b85576f7134832205877b5fd4afa45abe89e12476783a37db421aeb5d61c9df62aa283fd0a288dcbb4057f2721df8258655f5fba938e14e396f2d986a0d00b5a7858bf4b0f9685a9ef6a394d9d2bdc55f720977b886b64004134ee9ac3730bd27cb767bf90bce7e07262b522b7951f7e2bbe74219f548084daa88f8513313882848d9aa2bdccedff1020348fcfdaf0f70d0e41426387165e67b6b9f21f3c5e7c8ba6e40aaec6e70b1b233495f1fb2c36435558a6b1b8cd000000000000000000000000000000000c141a20272b323b","attestation_type":"pqc_registry","signed_at":"2026-04-20 19:44:42"}],"hndl":null}