{"id":593,"slug":"opensqz--automathtext-v2","name":"AutoMathText-V2","author":"OpenSQZ","description":"\n\t\n\t\t\n\t\n\t\n\t\t🚀 AutoMathText-V2: A 2.46 Trillion Token AI-Curated STEM Pretraining Dataset\n\t\n\n\n&nbsp;\n\n\n \n🎉 AutoMathText-v2 has surpassed 1.5 million downloads!   We'd love to know how you're using it. Please take 1 minute to fill out our use case survey.   Your feedback will directly shape the future roadmap of this dataset.👉 Share your use case here\n📊 AutoMathText-V2 consists of 2.46 trillion tokens of high-quality, deduplicated text spanning web content, mathematics, code, reasoning, and… See the full description on the dataset page: https://huggingface.co/datasets/OpenSQZ/AutoMathText-V2.","tags":"[\"Task_categories:text-Generation\",\"Task_categories:question-Answering\",\"Language:en\",\"Language:zh\",\"Size_categories:100M<n<1B\",\"Modality:tabular\"]","license":null,"framework":null,"parameters":null,"downloads":124879,"likes":78,"verified":0,"created_at":"2026-06-23 18:23:36","updated_at":"2026-06-29 15:23:28","source_url":"https://huggingface.co/datasets/OpenSQZ/AutoMathText-V2","source_platform":"huggingface","hf_repo_id":"OpenSQZ/AutoMathText-V2","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":1,"risk_level":null,"risk_score":null,"versions":[{"id":592,"model_id":593,"version":"v1.0.0","manifest_hash":"23ef9672cfb3406a26fa23fa8185e3707a000720e982c8a44201d3ab8c543489","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/opensqz--automathtext-v2/v1.0.0.json","created_at":"2026-06-23 18:23:36"}],"files":[],"signatures":[{"id":1126,"version_id":592,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"7146d93ae0b8181f8a5833403da8d4a568a344f694ccd347ac008ca37109ef3b","attestation_type":"registry","signed_at":"2026-06-23 18:23:36"}],"hndl":null}