{"id":289,"slug":"fineinstructions--fineinstructions_nemotron","name":"fineinstructions_nemotron","author":"fineinstructions","description":"\n✨ Note: For all FineInstructions resources please visit: https://huggingface.co/fineinstructions\n\nThis dataset is ~1B+ synthetic instruction-answer pairs or ~300B tokens created using the FineInstructions pipeline.\nThe FineInstructions pipeline was run over the raw pre-training documents in the Nemotron-CC pre-training corpus (a subset of high-quality documents from CommonCrawl). See our paper for more details.\nEach .parquet file in the data folderhas a corresponding judge-*.json file that… See the full description on the dataset page: https://huggingface.co/datasets/fineinstructions/fineinstructions_nemotron.","tags":"[\"Language:en\",\"Size_categories:1B<n<10B\",\"Format:parquet\",\"Modality:tabular\",\"Modality:text\",\"Library:datasets\"]","license":null,"framework":null,"parameters":null,"downloads":1195546,"likes":9,"verified":0,"created_at":"2026-04-20 21:02:05","updated_at":"2026-05-08 14:17:33","source_url":"https://huggingface.co/datasets/fineinstructions/fineinstructions_nemotron","source_platform":"huggingface","hf_repo_id":"fineinstructions/fineinstructions_nemotron","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":1,"risk_level":null,"risk_score":null,"versions":[{"id":288,"model_id":289,"version":"v1.0.0","manifest_hash":"d5570799ceb43ee5899aea99639b3833a46a3fffaa17d1893debbd478d7f338a","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/fineinstructions--fineinstructions_nemotron/v1.0.0.json","created_at":"2026-04-20 21:02:05"}],"files":[],"signatures":[{"id":745,"version_id":288,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"f0b9dcc7b4af5b8086483ae0f7284de324e214e4a94ec6ccb9935826491c9ff8","attestation_type":"registry","signed_at":"2026-04-20 21:02:05"}],"hndl":null}