{"id":664,"slug":"codeparrot--github-code","name":"github-code","author":"codeparrot","description":"The GitHub Code dataest consists of 115M code files from GitHub in 32 programming languages with 60 extensions totalling in 1TB of text data. The dataset was created from the GitHub dataset on BiqQuery.","tags":"[\"Task_categories:text-Generation\",\"Task_ids:language-Modeling\",\"Language_creators:crowdsourced\",\"Language_creators:expert-Generated\",\"Multilinguality:multilingual\",\"Language:code\"]","license":null,"framework":null,"parameters":null,"downloads":1210198,"likes":367,"verified":0,"created_at":"2026-06-27 11:23:44","updated_at":"2026-06-29 14:23:35","source_url":"https://huggingface.co/datasets/codeparrot/github-code","source_platform":"huggingface","hf_repo_id":"codeparrot/github-code","ollama_name":"","category":"dataset","latest_version":"v1.0.0","version_count":1,"signature_count":1,"risk_level":null,"risk_score":null,"versions":[{"id":663,"model_id":664,"version":"v1.0.0","manifest_hash":"8385ec91bf3241ee723598fd4d46894ad7a07090273a2dac051a485cd47be070","file_count":0,"total_size":0,"r2_manifest_key":"manifests/datasets/codeparrot--github-code/v1.0.0.json","created_at":"2026-06-27 11:23:44"}],"files":[],"signatures":[{"id":1218,"version_id":663,"signer_did":"did:quantamrkt:registry:shield-v1","algorithm":"ML-DSA-65","signature_hex":"a32b5a5ce4701d75cec0de8d569f473dc2b5d9a1eaa45a6a8bf6efe1afec9710","attestation_type":"registry","signed_at":"2026-06-27 11:23:44"}],"hndl":null}