{"id":665,"slug":"legacy-datasets--wikipedia","name":"wikipedia","author":"legacy-datasets","description":"Wikipedia dataset containing cleaned articles of all languages.\nThe datasets are built from the Wikipedia dump\n(https://dumps.wikimedia.org/) with one split per language. Each example\ncontains the content of one full Wikipedia article with cleaning to strip\nmarkdown and unwanted sections (references, etc.).","tags":"[\"Task_categories:text-Generation\",\"Task_categories:fill-Mask\",\"Task_ids:language-Modeling\",\"Task_ids:masked-Language-Modeling\",\"Annotations_creators:no-Annotation\",\"Language_creators:crowdsourced\"]","license":null,"framework":null,"parameters":null,"downloads":120455,"likes":645,"verified":0,"created_at":"2026-06-27 11:23:45","updated_at":"2026-06-28 07:23:25","source_url":"https://huggingface.co/datasets/legacy-datasets/wikipedia","source_platform":"huggingface","hf_repo_id":"legacy-datasets/wikipedia","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":1,"risk_level":null,"risk_score":null,"versions":[{"id":664,"model_id":665,"version":"v1.0.0","manifest_hash":"d2a3fe56323043f7a2920921ac847172758f058fe526834f9c7d52b17d5345a6","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/legacy-datasets--wikipedia/v1.0.0.json","created_at":"2026-06-27 11:23:45"}],"files":[],"signatures":[{"id":1219,"version_id":664,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"d5235c597f3d4e6c225625cc57bb22dd2808579fa19883f6995d755b2213726e","attestation_type":"registry","signed_at":"2026-06-27 11:23:45"}],"hndl":null}