{"id":176,"slug":"allenai--c4","name":"c4","author":"allenai","description":"\n\t\n\t\t\n\t\tC4\n\t\n\n\n\t\n\t\t\n\t\tDataset Summary\n\t\n\nA colossal, cleaned version of Common Crawl's web crawl corpus. Based on Common Crawl dataset: \"https://commoncrawl.org\".\nThis is the processed version of Google's C4 dataset\nWe prepared five variants of the data: en, en.noclean, en.noblocklist, realnewslike, and multilingual (mC4).\nFor reference, these are the sizes of the variants:\n\nen: 305GB\nen.noclean: 2.3TB\nen.noblocklist: 380GB\nrealnewslike: 15GB\nmultilingual (mC4): 9.7TB (108 subsets, one per… See the full description on the dataset page: https://huggingface.co/datasets/allenai/c4.","tags":"[\"Task_categories:text-Generation\",\"Task_categories:fill-Mask\",\"Task_ids:language-Modeling\",\"Task_ids:masked-Language-Modeling\",\"Annotations_creators:no-Annotation\",\"Language_creators:found\"]","license":null,"framework":null,"parameters":null,"downloads":773251,"likes":566,"verified":1,"created_at":"2026-04-20 14:59:20","updated_at":"2026-05-08 14:17:33","source_url":"https://huggingface.co/datasets/allenai/c4","source_platform":"huggingface","hf_repo_id":"allenai/c4","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":2,"risk_level":null,"risk_score":null,"versions":[{"id":175,"model_id":176,"version":"v1.0.0","manifest_hash":"6c5036dc39f7290e2c487584fc3f9ad7254c9d55a0d280d5871a38db07398d2c","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/allenai--c4/v1.0.0.json","created_at":"2026-04-20 14:59:20"}],"files":[],"signatures":[{"id":537,"version_id":175,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"e06e0cf15aed502ecd0ab505c09fedc07b7df2c1881c0c700416eea1a6521d2b","attestation_type":"registry","signed_at":"2026-04-20 14:59:20"},{"id":702,"version_id":175,"signer_did":"did:web:quantamrkt.com:chain:authority","algorithm":"ML-DSA-87","signature_hex":"21cb012ef13ffba287c3049a337b549590fe025bf425660ef0a60b38194e17bf058ac0b42728400fd834be939bfcc292a25dd28a4a3566c2f645a264cbb6536737d75f51a51d83c3ef097d9d69a3eeae39fed28c8a0c0a77bf16ad89e951d012e2c3105f309804f5ef4178dafc48e836f2e807e0c24afead65e548d31c2cd0467921d3bca7e63f20f42115c557618466b9875ef571b512cf2fb89c66d820a328f224719388bfad70f59a61e7c239025bf9b9f2622a9917d5280240d2fd3db163fbd1ae5b1a7f6c18e48064fa24db2c70e25f459c5769b935ddd44bab49ec184fd7a6a71ff8c5b68dd8fb9253c7bb435ccc0aeec9afbed1e82b6749fb3591fcfa6ac7a6c9308493f27afc4530638dbd6896434964876067ef6f385a2d1ad27fd2e876b94ec6e291cbae5b90ff23d574076dd11f421f7d2c540ee7cfb24b8472a4ee92419e5f8bea8c487499d626dc2b77b52ff11de640b430302ad5f0de1919f7eeec856497a6e6229227823c3fdf031eeafaff4d6623913ece52b503a448adc5971663aacf5d5fea2e643f0f2415630c256a5593b60d8922c6fd29c3d7f0bddd6be929e2b4e1d921ab5e22cc32313f88d004a70ca7fd65d47a85421a3d64af06349f55c775a74d613534246a00618b1dc160555fb0cda7519c3ec8de92cc5c72c6ffc8d8cb7de6597a065a5066f0baabaf4dc1d9f76a089b5da733fc89faeb497654c6ec78a1d767e95a6ad7f803080fd81a020edc3ad97e6b0b1daf59d0435f9bcddfae598c1fd9ccd6a22cf0bc5fcbf05224b342fd3d3e6ed4c4776eca5d20ee783305e7de6889f2e72dc007a75e511e644088fb6ee0c521aa91bd3fdc6eee46c294a2be6aaaec99a6397b7418157adda91ac46ea7b7a04eeec441286f2e57620dfdb02d3b97828891dcd058a5755ad23db6ac3c8da9a7b5380c22ba5c1c5d6f12717223380b42b158950da1877d348695dc312ba96bcf221893eeaa522859967bdc5ce5965dd7d9cd22068fbf5d19a0dae69b2a5eebace558f82b344c8bb1a34cdfda125a01c19b438897be7a7c40e1660ced6337978bca221da99e79dd80beef09e615e6d6203f01e118b91165cfde126aa5eade60c233f073d9769caa8a64df3004adb1688b3e51a405185fd9a0af6962b852c3b35910d4be89183bbc18e8ba618f26e38eefa8bb353b9546ac603b9b7d2fb6b426f425e67a95345a070b7bef7601c855cbdef6fdb8bea35154faa620aef319bd67594029ab31f1450f6051eef6cba19316b807ee5517e4915372ef345d099bf957336e37a943439acdfcb5400d4aaa779c73c6be0d97652b1b98854f17171a24a235e83a5c6e8db20845d2306ff0f71a241f1695ad8970398c70bac4e6d71a18730ee3a481a40bde207e67c4bda26140960517adaa92c64ba2dc63c1d75d94eb6d11a8624fd9df725eed331e5e2c088792acc2aae35cb7d0881a72e949a6a9bedff24b23cc71d8e62620cdcc1273800c8b82e9ab2497e2ead9a9d361d86e05394e1440398ac1594b5d3a5154cc20e986b74b8086a348c0667ce920fd99bf125fd1ac900cc8ffe20d0cfabcb19f6e0b21dd378a37b617a7d9629f3fda19b312928e90b15d81562eec405b4751495953ae0a3f623ed6ab37f2d2354e09535e0d88cc29059195bc490d6894a057d87f0924350c26d7cc646a545d9144246ab4289189257bb8a059e7243c7971760f71c31bba42fe392e597c8adf050953ade69bfec571f8d3f6e5929b77f002bbc0d657252e7a83e4d4e1b88182ce6c6962693297be477d39d4323e00e2c74cdcb2b589e936fa6059b46c6d3f2d6d68a0fa1f9377fe4c460e524f482a7d3085763552ace1329fc4fe8e8a0c03a135ad1d57184c76817fc480fd5b80e5abf0ed1fa921895d0834a125c05b927436fc3f68df0d1f50243e8170dcdb880c554980917af4496552656c61e6862caf2999f13dd3f604800988c9dbcbc200011453e3710d38868f00a00f839e947288a792642196df12236f8f8c252a1ba9e6e6a9e0bdf4fb08e48f44d56b39e232bf34c045a3671cd313415c3bac3c376866e0dcb3c693403cce49febc8d0ac720802c5390ea0da51d502774aea38ca9ecd804fff4644ae2a43048be990ffe8a5abb204d9c79bb626532bdacfe868eb3b30ae6d8b83090cd5270d04a4273293a0c201e6388fd3f7599c26803d4f3e86159a7b1ad94345150a94a77160ffba5d43d0d8f2dce7683dd4124f6df2cc8a812250297cb241699ab40af90666530f7da08b0f40ac1446b7e7ab97bb9088d962f2484e8fd1aa47b4abf52c4bf356c4235231012f2b6322280e05817f96928ab7c05f5800e5f02c4cce1c1221ed01d23a8d9ce7625da287090a03dbe3b1563ee071361cc17799bc32736a07dba2ff33f3a0cde5fde80104f8df8dc818f6483f47c067379dc864e1d59c60ab8a02a7738e81e42970645d46cf388a5c054b041a9ecc5bbf8be5693a4f58c5071d43a2bd3c4b05bc5b0bb51ad92c3849b47fca4c1512ca09b379edb9a689fbb569325e04e92187f15d0c66c85c7f54bd9f1f28cde63dcff6b9698702e0b669700f4c9f54c026136a393c6e7eb239352202169314ef27d0b689804f918ab8681cea921f3ee8b187c094b54908aab6ae3ad68e3d2b17fb13c9dc71f11cffef24340cf0d5646b0cb5c6aa1da978b028071eec304fe9216b01310d36db6833b75ac0df53ad3a7e232dad4320db094b34be5ac63159d6bcb160e7836fef0c3a7e18ae00669b778060cca397344d84920e53321d99ae1f8f405e1a76e874e8e38c9106a4a503d3d77f62bbaa6c9b08aded3cc314752d63d7d43a0181e34290249e979ca71c79560b8423a523b04929fe4c8118add9111a9cb495430741bf03f3df42e9e9e2dfb7fce06674a7e1bc3ad8da56bcecba7cc65abe14d645f7d83d604891166ddd4f9128c8364049429634b1b1b8559bb4a9864bbef593d84b8b3041b98870d941dfa6534766cfa389743bdcd950c17ef68ab053a0437459a988acf0fd7deaa049883c5ff995fc6d5c68e6fcb0253ea7c28aa1ddb9596372b7b79de0e4f82cc41404d798604c0b7259a0fdd9ebf50549c21e0c57563397a8d007e99323da08feb5665efc65044094157f92b6cb304baea84884bb3ed25160997bdeb6c5423c61572139325ecef2708a8a73aa4293d4bfeccdada8491ee1532bff096b55a89f3506e4439421fbe44e9f1a64e8281a95d933e9074ff1dc596a90948f84d0cd21111e33993fb700d728b36c89ef8ec3a9132de2171b2a4c898138c7c1c1ee1cb649d6ca6ea0bbf5bc400bc775ac71d3b0d5d9a963c2d3df4d1ed86b23e1afd7370ea0a6fac49f28ff99d27e6a9164f9ace56899fb1916ba4279f74e6d954a99e4d9e35bea2f5a98b6eb8ed94d9930f8393e6d8c4bb4b8cce597aa5ff4724fa6e78363d26c083bd5458529c76c32d22d99f5e205bb9d2dda1960c06860915938a5e4449b09f463818f62c2f484ac022bd3eb1a4cf600b60b39f17ff3babdd8ea32025fd3454ee32c93b28eaa08bcc86315f7423e464ea60d9960b9dd0bd28936716416680173c4e85e65d71f04577b6a398ffd9daa8822d89cfa6ec76d01d55587644f7796f843ad33917b53cc85811ff479a356f6912a3447da437924277222b8b6a2d67798183a5df05765fd9a4a3dbd05e6bd5a6ec637fda37c048e20235dd052e0a1e39de95c3ed8b8d3fe39113eb6b13c2d35216cdb9a9d7cef835cf9c5030d18ef8816c2b03fe9d4b0d89900c06e0f14213377ab51be872fd601ed2269908cda55f07cb7d24f1c7c427fe7b611b54ab38beafd227ae8529e2c66abdd6eebc18d55eac5d221409d3b91083cf67254d4f2af9bd72699be6381905c33859a68e3cb27c2ad2834da6abe0d2abe030817462ce5433925e7de8aa43155f614442355fc7b96e4b8b9d9211fb4d4a373368a550227c97bfdff9568e615229c5d8f7da1d8f834f292eb53c0f4569784c5634f9bfcb430b7dff1c44d81f5314e7b7357624dc1d8274df8dd732ca478000c0c2281c2ef355e0c58805d00815af1e1d9b4ba0d15857d117ed972e4bb3026f9a85534106c6db20e1cac1e1fc4cc2dc0f6b1d0ec94a1226e5c4bf88a972c0435f32d855174c17774ca71148f961c12373471435b00720b3fc9e4cb9dcbb82e8b38a070adbf9f8c4b239c5ad5751d8660d504403737fec1322a841aab3f8700f57383aedab9421aed36182977f95350d996ef3912cb556144cf4f8d4ce7c47e699b74d9d7a7832bc3926428fe2d2111093ddb3f3fd7babd0cb608b9cbe7b6626665f8499e2d8aab4f0dfb24fc22d831f065dc8018e0fefcfd2910aa7a4b47672ca6f248fc72adf13da5e36fadd0bd9ab17541f91317e8071088c922891e9a8bba1baf4c5e542b6a9ae57c5b09c7f3366eb73bb98e6a651bf10717d940e75994da55525d21f1998e9335cf19316b9fc1b705b1a4f738b3c090186ac4e37b13935d0ffb538eee0bf65dbc2ca2c80f3b76c0ac974613f5ab4b03667f0056bde72d7bb4912f01e15318047f54236a776c9ee10fdadedb9ec896a001885a01c6c4b25ebbc6c9c6e26dbf30cee8dcc2fd60db6a02c6ef4c3a0d40d149409a67b1f7d8b594f4a532c588ea8f46dac8002097c14e32d7ddc4723a33fd03916189c5e113e9cd924d57215130b7e9c6b95f6e1fdeaa71832dc19673557eea4210922de6ec44fc52beecc40d39f3341581551032533500d632f26cb78c03e37f04ed366b9888392b898d50691298c7870a5a00debe28d8c0d8bee85bac1e58478ddeb51e19b8d16b1638963672b72b60bc384c83619fa0b1e3dba5c7be0bb0c42c68666f95713f54e9f422f979e03807b87ae032e5b9e2ee1508ed43c77b4877db56457236e5437bf147208976f8d9303bc13514ed249c868826c5892e8bb0b2d5d6313f80bee5dbfc26d6b351bb2dddd002f1829661abc0a66c31faea2c87c801a2ff58cb4fb6dba674d5d0ce5416494cdd8c1d6d946facba5f206b5c390694c7fd3caafeba7fe7d74e0c1048b6d6f230b9f3ff10549e5834a3daf3e73a669f936d490a7a101250307f2d92cbff52ea1284cc62286472250e3ddc2ca0c2772771feef4ba336e55984feb4c0f90a290c9053d48f2d4ef39bdd8517c6ff817385074b201bebcc7665372d6a9ac60d3fdeddfbe0bd1d4dd60c44bda9d48d0ab55e423dc1356bb5a929189acc629b35bf70f39e7023804a53c7e4f13f4bab745a9c60112730fdbdffc6a10da05a4c0508a4fa7355654195855057513e3ccb94a1fcd2db4cea0bdf868829ce00d2119a734680e652ffd8f6cd569740d113003cba733945fad06b2add2317e9399aef5c9bc81365b6cc779ad2af2fc8197de4bff48a5654dea97b024b0b9182b5d8176d879e5f16576ef08fca682e4a3117ac9c5d0cbd76c755fe24b21986e9e832898b8f47748e461a90ff4f79a44aa3fbcd2c7ef0d73439867394f66da5dc9f41b3d9500cfd947b36d41ecd7cecbe0f0967edb05282cb675395d0213e42d06602a3f40f2aea34a65b5907a8944df0c44143e0bf269613e6ece55600d595f10df6e388987be39bab90625dba32b3a9e9ca19cd59992a43f11439a2cd4702aae6886a114c6461cf1aaf7b9409b4a5c713170484c385a29439a07246d6177435934e1755347d444a49d96f9cd09253f01e886cfbc823ba4e9093e56b33de4b68290e13263ebf1bd116d767a46e0f177e985dd29357e7837f35841de7dc04da96992a20b15e3a11fdc22a17ac2552df2cfbebd648ec5d1c5e85a240705d32f4baa503c3d4fa4a38aaa900cdd0f91059a4efd7808f6182e51d611da299a4d2338f86b63435718a02e96f805ac9c93dda201c2982f762bbaf4d55d402bf7d32563ed66ec0dc77d73a2de4c8c6bb39b0e337aa401ec43ba305eae9578e58c6f808817f3a0e4740b4ca63a8667825da6276a8fad594ef4621170037b3166aa2803506e949fa73d43175cb9a3c8ef8bb02e66b7e2a9747b6aaaf3b38815a729493c4b56bbd15af714e68db19cd1ce15d4c2bb55d41b3ccdcf24597ef0823b0d1f1ac6e9ac0dd2370ce7f485002619a52d90aa776ac31fb60f87bd4a368da23e7782a109d657bf869c9b9970463ed5a50bb56a80b792b124b13309b226a5c16e515cc742c35af2638ccea86e4f8e09dc826f9c709b61280efa6bba3d8335f2dd54efe9ee73306cc63aea1bd7fd3466cfb5058bef03d7108c989b0b728b89becc50b6799a1d7751d210db4c8831fd955c71f5ce625d8a806501118fc7597d2e9833594742fdcc4b1bd8575d7dffddd47b68268a888ea4b2ddcc2a1e023fe2b6497757f2f2a9d6e24a65d9a30dc25535cfc4afe21f3f47b85a5b5a9dd30305cd7b8a3fc8eb03f88cf175db110203292e3b5e66747882889197a5ce0e1a4a81bcbd102375bfcae00e1320335d7e9eb3f9090e284872789cb4d7fe16262e7281aab1d82f565d5e72cfd0e92d88a9000000000000000000000f151b242e363e41","attestation_type":"pqc_registry","signed_at":"2026-04-20 19:44:40"}],"hndl":null}