{"id":318,"slug":"tiger-lab--mmlu-pro","name":"MMLU-Pro","author":"TIGER-Lab","description":"\n\t\n\t\t\n\t\tMMLU-Pro Dataset\n\t\n\nMMLU-Pro dataset is a more robust and challenging massive multi-task understanding dataset tailored to more rigorously benchmark large language models' capabilities. This dataset contains 12K complex questions across various disciplines. \n|Github | 🏆Leaderboard | 📖Paper |\n\n\t\n\t\t\n\t\n\t\n\t\t🚀 What's New\n\t\n\n\n[2026.03.11] Added more cutting-edge frontier models to the leaderboard, including the Claude-4.6 series, Seed2.0 series, Qwen3.5 series, and Gemini-3.1-Pro, among… See the full description on the dataset page: https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro.","tags":"[\"Benchmark:official\",\"Task_categories:question-Answering\",\"Language:en\",\"Size_categories:10K<n<100K\",\"Format:parquet\",\"Modality:tabular\"]","license":null,"framework":null,"parameters":null,"downloads":148101,"likes":470,"verified":0,"created_at":"2026-04-20 22:04:34","updated_at":"2026-05-08 11:46:21","source_url":"https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro","source_platform":"huggingface","hf_repo_id":"TIGER-Lab/MMLU-Pro","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":1,"risk_level":null,"risk_score":null,"versions":[{"id":317,"model_id":318,"version":"v1.0.0","manifest_hash":"07aea427708096fbc09427e6e768758d720f91eb7d0415a967f2a294f6ec648e","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/tiger-lab--mmlu-pro/v1.0.0.json","created_at":"2026-04-20 22:04:34"}],"files":[],"signatures":[{"id":774,"version_id":317,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"f78001235c65cbdc67aab91d0a590d46b82861c6632f8d99846db76b372221a3","attestation_type":"registry","signed_at":"2026-04-20 22:04:34"}],"hndl":null}