{"id":428,"slug":"nvidia--nemotron-cc-v2","name":"Nemotron-CC-v2","author":"nvidia","description":"\n\t\n\t\t\n\t\tNemotron-Pre-Training-Dataset-v1 Release\n\t\n\n\n\t\n\t\t\n\t\tData Overview\n\t\n\nThis pretraining dataset, for generative AI model training, preserves high-value math and code while enriching it with diverse multilingual Q&A, fueling the next generation of intelligent, globally-capable models.\nThis dataset supports NVIDIA Nemotron Nano 2,  a family of large language models (LLMs) that consists of the NVIDIA-Nemotron-Nano-9B-v2, NVIDIA-Nemotron-Nano-9B-v2-Base, and NVIDIA-Nemotron-Nano-12B-v2-Base… See the full description on the dataset page: https://huggingface.co/datasets/nvidia/Nemotron-CC-v2.","tags":"[\"Task_categories:text-Generation\",\"Size_categories:1B<n<10B\",\"Format:parquet\",\"Modality:text\",\"Library:datasets\",\"Library:dask\"]","license":null,"framework":null,"parameters":null,"downloads":147272,"likes":116,"verified":0,"created_at":"2026-04-21 11:33:27","updated_at":"2026-05-02 00:07:03","source_url":"https://huggingface.co/datasets/nvidia/Nemotron-CC-v2","source_platform":"huggingface","hf_repo_id":"nvidia/Nemotron-CC-v2","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":1,"risk_level":null,"risk_score":null,"versions":[{"id":427,"model_id":428,"version":"v1.0.0","manifest_hash":"7f6f3621573227f27ea7d938c8810dbedf764b647c979eacc95e25980b3e3abc","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/nvidia--nemotron-cc-v2/v1.0.0.json","created_at":"2026-04-21 11:33:27"}],"files":[],"signatures":[{"id":922,"version_id":427,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"a9fb11e06ff0ef0bfb8cb1b5a994951968143651979ec18cec9338d322166849","attestation_type":"registry","signed_at":"2026-04-21 11:33:27"}],"hndl":null}