{"id":669,"slug":"allenai--dolma3_mix-6t-1025-7b","name":"dolma3_mix-6T-1025-7B","author":"allenai","description":"\n\t\n\t\t\n\t\t⚠️ WARNING: This dataset is intended ONLY for reproducing Olmo 3 7B ⚠️\n\t\n\nFor all other training use cases, including training from scratch, please utilize our primary dolma 3 data mix: https://huggingface.co/datasets/allenai/dolma3_mix-6T.\nNote: Some olmOCR science PDFs in the current dataset have been redacted following the training of Olmo 3 7B. These texts are indicated with [REMOVED] in the text field. This will affect reproducibility of Olmo 3 7B. \nFor this reason, please use our… See the full description on the dataset page: https://huggingface.co/datasets/allenai/dolma3_mix-6T-1025-7B.","tags":"[\"Task_categories:text-Generation\",\"Language:en\"]","license":null,"framework":null,"parameters":null,"downloads":144901,"likes":53,"verified":0,"created_at":"2026-06-28 11:23:37","updated_at":"2026-06-29 14:23:35","source_url":"https://huggingface.co/datasets/allenai/dolma3_mix-6T-1025-7B","source_platform":"huggingface","hf_repo_id":"allenai/dolma3_mix-6T-1025-7B","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":1,"risk_level":null,"risk_score":null,"versions":[{"id":668,"model_id":669,"version":"v1.0.0","manifest_hash":"2c437678175c4f240f9273c6ec0e89f4384c7bca05a597612639929bb34aae20","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/allenai--dolma3_mix-6t-1025-7b/v1.0.0.json","created_at":"2026-06-28 11:23:37"}],"files":[],"signatures":[{"id":1225,"version_id":668,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"05f1f0ae7a7cb292f405adf65f576a0d270100674bbbe64fd8808cf30c254000","attestation_type":"registry","signed_at":"2026-06-28 11:23:37"}],"hndl":null}