{"id":173,"slug":"mlfoundations--mint-1t-pdf-cc-2023-50","name":"MINT-1T-PDF-CC-2023-50","author":"mlfoundations","description":"\n  🍃 MINT-1T:Scaling Open-Source Multimodal Data by 10x: A Multimodal Dataset with One Trillion Tokens\n\n\n🍃 MINT-1T is an open-source Multimodal INTerleaved dataset with 1 trillion text tokens and 3.4 billion images, a 10x scale-up from existing open-source datasets. Additionally, we include previously untapped sources such as PDFs and ArXiv papers. 🍃 MINT-1T is designed to facilitate research in multimodal pretraining. 🍃 MINT-1T is created by a team from the University of Washington in… See the full description on the dataset page: https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-50.","tags":"[\"Task_categories:image-To-Text\",\"Task_categories:text-Generation\",\"Language:en\",\"Size_categories:1M<n<10M\",\"Format:webdataset\",\"Modality:image\"]","license":null,"framework":null,"parameters":null,"downloads":97564,"likes":13,"verified":1,"created_at":"2026-04-20 14:59:19","updated_at":"2026-05-03 06:42:09","source_url":"https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2023-50","source_platform":"huggingface","hf_repo_id":"mlfoundations/MINT-1T-PDF-CC-2023-50","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":2,"risk_level":null,"risk_score":null,"versions":[{"id":172,"model_id":173,"version":"v1.0.0","manifest_hash":"443c4709f3e0a1c411a2b4013c28a31ed83d360b34f29e53db0c1c0eb20a3db9","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/mlfoundations--mint-1t-pdf-cc-2023-50/v1.0.0.json","created_at":"2026-04-20 14:59:19"}],"files":[],"signatures":[{"id":534,"version_id":172,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"d69b8f61c5fd4260abe5e3243efbf5053bd4929022c094e93caed134b4b75ae8","attestation_type":"registry","signed_at":"2026-04-20 14:59:20"},{"id":693,"version_id":172,"signer_did":"did:web:quantamrkt.com:chain:authority","algorithm":"ML-DSA-87","signature_hex":"4573cd53f29bf3e5d337a737cdacc3dffd324e2b7182c4423052af24a3c8fa0977b8985cb7356d63bce089c9869d18ed5212a32a4f99f54280412e07961c1843ccfbc02b3e322bbf6e73987ff924c96f27fc6349243e96f9acb267436ba3a3b31805a1e5c11892e3f7cb6d2e40859194e64c3343a6e0f21fc528f209a67d23afeb6b9d6b6919c9f7e4d0a9dbc7f3ee69153508438f87a103d79d91c3599bfaa555825429579b349fd328d65331ef330a326afa399f14b9834493ae38850ba95397ee903e34ce7b0c45147a4fdbeccb1a4d478d8b619b65d564b125e329d701ff0c8a44026e14579c13e573b0fde3ef72a390c894c0b5d076c9daf3606e957091a702772275ba99da50f0a2614697ff139478294157765abfa34eb2ed8601e9db13d5fdcd76e62045cca891915fd60895aba6402720add1c7f2fa3e9e9fb9c7e2c414540bfd83f645b8bc9b20d5be570dd31410938253ec28ab4180e28d0819d9485b0b399ab693b021c70ced899f8d9a1b5c79cc0605eff5a2d2acccc8f2b1593559b0759bc789a399832d162783d6726f9fc8d98e92f56490ee44bb8e260f6bf3efb341acc1e8ef3a73cc104d647ebc4577aaab781aefbf91aeb28e2e670e197f672c2c681cbf24331cfbef8303a4b58a2c838e130af70f3dac62756a5d498dfc4edf4a5ad56c894ac1f11e240dd3f80945d0266bd396c8106e1e7ac4cb5958741d8110dcc1795952cfed0ce34a191e7a6e07adda6c71eb8542d931fdb2ea046b2d0f8b94a84d911978324907ca7f64eec2ca5c72c3eb50257a13694b92fc15501627cb886c775d8c034ed99160bb875a93b25ae2d7ba69bf4dd63c538120822898caef43651899615e92104f7d47d8a4600027a3d75325c6daa97ace4c8a31a671d569259d9b34d3af8a962e03b4d639371a34be15f2a8e01e47930b8302a81861febf5501c118a97fe5618aa664794e45026ef66f6212681404caaf54aca17b8b217e2d6446c57e01e114c517f45d1abf1b90f4ddd16c62a4aca6a3c01b888f68ef521693f2588508c93f6e7cab9d99d106db8daf00d97f5f8be8a515a873e966678b57934c05af2ea69fbb4c210df910deb7aeaefaf097a3e552426e9a343adb9c46f1deb0e539a3aee813225d2c4c01c6c023eca23d9049d9950dcebf4b18020e0a26491295dc2a165459e498822ac84a7353d997f1f4cd7f85ef107671bb952243cbfe3f4edd6256292810b1725d8d59af3c9eb079c17dba7388ccb637e7f885b51f197a7056c040e1685957d54aa4a471049f02128ae4e34cb4b5dbca085553a1c83ccce39432256e656d73973ba9f50438347c8eff6956f99188b4724c82025db054d73bec86a9f3538e343dee07ab8f41a80539dd36f08afcbe71a9ceec4ac3d1a74ef09877fcfab534994313b477d067f7ee09cb3b9b0e432aa0da13df32e6fc9460e9ff74ef34b8843fbcc5a496ecb3c89aea64eee8fba4e0f25fb2aebd0e8dab3c07411d5f85aafab835eaef0b903995b7479571ca4264cb7fe6a43da93866119b0b79a1c7fb205edd042e623b7d060d4aa7689e305894e1de8381515eba8f85ff0e94d82cae1d6578ea5b62a54d31a7ce455dafbaa7ddfce4d5eab83bc65cc94022c1fd8f7a4e1ea5ffd465d682a265cb92ae7f5795d1b9df4cb1f2393f5b61317e7a37447b87109a86b991f35c1f96a36c3b104a665c0674ab0b8652c606a68dc16b03a4e1f09d77b1c69a8ae78838aff8570f2bb498a5786d988266c7dc19ebcddc2751bb404854ee1917f7ca6fcfb45c2b6eee4c007926596700c77ec6485f15b5357af673b382fc6d0ad125be62e42b9652a8a342bc454b134cdd0348bc895bb249b106a2ab689e92d2385aa8d952631ace571bbaeac0b96862cce880d34b451680cd421792150baecec33d573257725ad661ec1348d93f8770bc15e55f241998812ad11c6e57370340c426289d5964849d94e036a2c2b5702b9295ff79cf443cd174280dfba6e791e556a5373cc004ec251055f608caedde1728d2d581b2103aad097b253908a5f5fbc5a7a9433c6c36ca9db29e63f985c5da888522f0b375de364a28eb4954be0bce4b4f210690396d7832a9cea3e55ded7bb4c83c486faf975325db425b57d940d261d3419f8bc0d68a1b81d78c21794c890c339748fa8b47c31c62a9fe2c1e5bca5d2cda29ca58312054eb92643b2feffb97b27a03f0a5ec3fdbf6c820e14106e9b55e0bfb21ab0b4728c026d23739b1bd8ab452d82bf5390124d57c9c749aa74236af14518b427041164170e4e533f2b8f1b5839c29745072a66a21b406988dc073275704df0415059a47e1139a73b9dc9770c9e88b4e3e8a10f970e5761bc20c1134d4529a7633e4b5e3d9d4eff548c826f75716fd873fffe08f86d98344fb907b190c35c1202823644a4dbd16e934feffe7e643b48205a5e9834f970f8ee5546fb92e883a0023c6a26240f5f243480ff2d5d8efe7bed2aaee5cb8c9ea2682d1a84e85af44451d3317ddd6a71d261e1cbffd2a803cee5172b28c3ce3fe985b7251984ca247747e6073db679306f52b4f249914863e3627c9e2b06e97d1156bc6589e21417cbe8c07ad5d1d483597899a28a57035db3b57fd1675387ecafe452be4859ca68244efc7ba89dd96f682078cc9756100ebe46246a22e177d6b968b4ae31d40bae925097b623e2ffa1e3788a71371eb74133ab95f5193eebfd4d5ee73445797752d6292c88e0fd3f143dbd83fc9639512fbe5261328c94e55b756bed6d5a677b249c3b2ecffcaaea85f8b505365c19be0d4d0fef7bc8b5e1699937b0dd5b13fbe37dc4de06cabc8ba01b17ae47dcbfa1520c8b153429f7167bd527d31057391d3fcc06686e1f13c051008e34fbc7239da016f4fba563a9965ed7fe459b9d091b27cf195be3b8eafea289d58b1208f88b591e38e4cc0c6c46782169c7f57d0630ae4534b7a233a37756db928cb9ba05273cda0d5231e98b9e732294af83bb4d2c9683c6c1c4b322d2c929445d45886e714413d42ad7b619bae9fead388d0914c01ba2f4d47caebc0558791ade910bd29e049d3de4a8f140256b3c4030c6e8e23ce749da41b87ba4079d2ca53999c4054adaae25672516416ac32e5c9098363185f5abe5b5f905b4aa416f04353c371f69ae94165f5142358d5187078002f2095de7cb6f9ca6025f0ec3204e700d5ddf46e9c1f552857e547f06c8bfa877cb3b7dfa6b7419179788e82b6dc0cdf97b81e4b0a5cda2536677c8d63768e087329cdf5c29a414da2d9a63414144cdf30c4550d2cc9046f3643cfe3f11ee75a3d698153f032ac77eb1bdac3409ee8832507cfe187fc83ed150ce617bcf4598c5b51f4b34fe91e40e0a2df93d95e6514b8e56841bc06278fdf8a9f533df8fa00e80caa3bd6925f75b54f78e314b42bf216aaef72fe7abe3b0e8a4f7b2bf1a00f839ef25147849228dd28b0ce70b86d0149a60fbe186d20958e4cbcf576aec17796e77339c8242e8e2711d106d0f1d9c8ad5fecba9f295aa6410578fa4f461ad09421923f05d90e94bff8f09dc54a91a9a6976f16647efddefcec366694a7f3d863832560d4ef38d297495b73cee528a86ac5cefef0e87e7d993a0217fd3c3086fa8b081e2940c6ba5b504236f6d05f5552987bcd27995d11cbad0ffd0b2f38b09830468f849c87c5eab713decd4a1b10941f4ff1428e2b8a1d7fdcc8b9a3f2148c14525455637d01bbaa13e19ab431989aff82751cf8a85445e8a09fba18d14ed51dc177f3d6d65b89a8fce7f74855475380376216ba4eea5a8ff0f9d437422fcc39f2b08c7001bdf148a1650f04cb72621674aba5daf3b502f5dc59aa1d78bc40da8717f19df017c0e9cb596ced9d0fe2c82abf2620f47d7a502ed51a13e65ead0852c5687990b074685bede5c98fa6d6b68b32339ef12d61471914f25de1d22d2f9a0e29d4b92441c91a86aee7262d85adb886d34af689c24cb718f9ce7132afe34f339beffed6ca5d534119c494a790433b86bc104a2668f329dc50dd0aeb243595a590aff4b1ced1b059dbf9e341c05caf12c3eccd05754ac336ff75e7fd4704848168121b62ee8982ecb25f64fea98507858f11e67bd1b3cc54ab8e6157c5ca92c3cad1ff02cdb9052d3bf86c46fb2ba79c774ba6aaa5ee791fcf511ffd80c168f53af5b3c761c7b0c5da99dba28c10bd239e366081bf074214b6532540b823c9a244757298c3747fa536f7adefe91ab8bb4f884235188d55671b3a9209122a2435b1814f1feff37fdabf4f1b04334b2c1086382bc155ec9067c1a1116d51b2d6a83e1b43f799468903766a04638e9f35653db7d4386b189dae604479102040def6a3fea17c2c5e0910da23ee2d09de22a44b09d726c65db6552ae3704b5e8e529c156faf6aaa9c5c85988553e0df410100a39e4149353e507f19bc4fafae18c6809bffc0221472587518716ed130693590f442dbfaf7bf30e539d2489686c0ddd44a9d3c1b4ff2dfafc3f99822550f91466739caa9f04ef6648bdefc335d372b929f62120e12ded952a77ac8b4d1a8788652025801d827d21965b7440c79bd88b8b96b5007530c2d48884d40645445650afa5ae891650f0ceceff25c7328af29eb2cab6d23874d188497ab831e3d8824a67c1536698a26d31b1c5b4bc9dc5672066784749bd7f36876e794beb746fa51f9c92473b31249522073a0e5793e3c3cee9d4106b49163b62490ec253c7072054bf552cc91889b14ac82e212fb00f31cf2b075134a27e2af5e5677e472c6a01f88401402faecf3212c91d0e01b1c0f3bfd6d5835533b56b21960fe9ed1c4cc7eab448471dfcddba099600a75eb3664b8f9fc76a0542fb5d18856311c0b7211d9a0c05b477379a6da5742e0780cf251356578e0497de252a96203e3460a1b4d4a597807ae997b8abbde78a106a946064b9a97dad8a1143d40cf73f9f66f3de18d7a3a11f12dc9759bbd0e7c19dd6df311da2b7710bf6ee9e33bc09191b26a006562019d8e5690ee994d52718944c26a0b2217936b34f904b14f6244bbb1dee2e556cf9fcd8071d4d3ad8fd0384a36ca3c550f0b3dcda036cc9e98b8eaaacbc535e6688ce47defb55aa7c8d814c0293a48700c08949d4231e50eb52abb3d7735d22b3287c550ff696145ed7e3ecd9d0fb206e50f475ea26f53ad35400de48429df51393e988cece6fe2ea27dc8bd3679703ac475d812e3fa216d12937c7696d450fa37f5db6eabc51dff9053b4f61321128c9e7d483f1a3b6d443edcf2e6a30846d978b912ad9e16d30e82290b57e14612a83470568efbf5b577c80b511217ac2a64faa1c788df1d97aa997eee5d7798a371d6416c9f08038d2a4ada3d10edd97db403ca6a17e034f91ad402cf400d72460c62586b122baaf6c7f4b3f0303f6f3c02a3013622dde4e36cc9fe93d9652e17301a6d8b27c2841c5e97978462257f049368b0b187b59d09107dc7cd0c8bae0959f7ff4bc78186daf9b3cab124aa06855f6791bff1cd8ebeea413a9e770fe93a6bbb6220738a7c42d19d168c7377070e70d844836dc3cc5f549ed06ae17621a57f991d6600a310cbab22230fb6ac8e71821f66fc753087ded36ae8c37b67345e88187d0d70513cc5935ec5bed5d1b6a1dfcba72669beda559ded35f5a1a5d367c9de4ef3af532a05d41d08e449d1d23c55cd2893c3942f60fa50fc2be39068926be4025a2d59638ad9381d869c09b42748ac4c7ac25438567fd51f64aef487b4ce469143cdb7192a7ee13624fc4215ebc11a637c6d42489f7e1033e461231e085d018e0174937e6b3ad8605eb236a773b82ba7eeb315975d43b654e63e22d5f684d663a5e5e84d245c269b38c2c12eddbbdf141bb01ec670f467344c2714e345d7f445ffbf16754a1e0bbeb866ae8aa12f3c50704b0f0c62eb5032d3c724c2bd5004a24e60df9043c0b139b8f49bb309558edea65fd8577a634e85a10cfa1f2f82e71406ab604c60457e2895a389d7d3fb204791953fdb7fc56fc94c97c99af0b2551759c68a507648142c93034e15b7a3cf629d7e71cff728f4cf7b6ead514af672cc5fbdf32fa120bcd2610694f6ea5deb08e4d802a90b523e9e85a03a2d0e797fe59229855a714724e85cb04288e8e5e97ee12fa345be8c855383036a2f2cecd396f676ecc7321ff7ec82ba87f8d5e0829d14c88902e51670240fdfa440767820f98f853334375f3b6bad8d1242279f877e223aa088b9838f7e75711712d12fbc7c97fef921ddaca391fd6556d0f4d85bc7b73617376f2bca84a2f926baf767daf444929859b4424f20e3a715b8da7486fb7a8aa693cccf57cf674d0cd7e70dc22adf4f85f07a738baaa419d490841b14e9f49957fc5157cf9a08a07227f8f3c15d034e5c24595c596f1c5a5db1bfe57882cd33438a2249709e9fa0304988b9e1fe595e7eb8e80258778a90b4ce0f5354585d78aeced2db000000000000000000000000000000000000000000000000000000000006090c12181d242e","attestation_type":"pqc_registry","signed_at":"2026-04-20 19:44:33"}],"hndl":null}