{"id":678,"slug":"algorithmicresearchgroup--s2orc_full","name":"s2orc_full","author":"AlgorithmicResearchGroup","description":"\n\t\n\t\t\n\t\tS2ORC Full — Semantic Scholar Open Research Corpus\n\t\n\nA complete redistribution of the S2ORC dataset in Parquet format on Hugging Face, containing 14.5 million academic papers with full text, structured metadata, and citation information.\n\n\t\n\t\t\n\t\tDataset Description\n\t\n\nS2ORC (Semantic Scholar Open Research Corpus) is a general-purpose corpus for NLP and text mining research over scientific papers, originally developed by the Allen Institute for AI. This version provides the full… See the full description on the dataset page: https://huggingface.co/datasets/AlgorithmicResearchGroup/s2orc_full.","tags":"[\"Task_categories:text-Generation\",\"Task_categories:feature-Extraction\",\"Task_categories:text-Classification\",\"Language:en\",\"Size_categories:10M<n<100M\",\"Modality:text\"]","license":null,"framework":null,"parameters":null,"downloads":52665,"likes":0,"verified":0,"created_at":"2026-06-30 12:23:36","updated_at":"2026-06-30 15:23:24","source_url":"https://huggingface.co/datasets/AlgorithmicResearchGroup/s2orc_full","source_platform":"huggingface","hf_repo_id":"AlgorithmicResearchGroup/s2orc_full","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":1,"risk_level":null,"risk_score":null,"versions":[{"id":677,"model_id":678,"version":"v1.0.0","manifest_hash":"7b92567aa22cedb8a821df1ebbac50cc19a9ba25871bbbe243cb16cf460947b5","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/algorithmicresearchgroup--s2orc_full/v1.0.0.json","created_at":"2026-06-30 12:23:36"}],"files":[],"signatures":[{"id":1235,"version_id":677,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"124e4d3b3a76ccc89e499acf153f0fc35adf4c3499984ff6537c7a66e64b05a5","attestation_type":"registry","signed_at":"2026-06-30 12:23:36"}],"hndl":null}