every_eval_ever/math_500.json

2.2 KB · 81 lines · json Raw

1	`{`
2	`"schema_version": "0.2.2",`
3	`"evaluation_id": "math_500\|0/RedHatAI/gemma-4-31B-it-FP8-block/1781646637.177847",`
4	`"evaluation_timestamp": "342932",`
5	`"retrieved_timestamp": "1781646637.177847",`
6	`"source_metadata": {`
7	`"source_name": "lighteval",`
8	`"source_type": "evaluation_run",`
9	`"source_organization_name": "RedHatAI",`
10	`"evaluator_relationship": "third_party"`
11	`},`
12	`"eval_library": {`
13	`"name": "lighteval",`
14	`"version": "v0.13.0"`
15	`},`
16	`"model_info": {`
17	`"name": "RedHatAI/gemma-4-31B-it-FP8-block",`
18	`"id": "RedHatAI/gemma-4-31B-it-FP8-block",`
19	`"developer": "RedHatAI",`
20	`"inference_engine": {`
21	`"name": "vllm"`
22	`},`
23	`"additional_details": {`
24	`"provider": "hosted_vllm",`
25	`"base_url": "http://127.0.0.1:8000/v1",`
26	`"concurrent_requests": "32",`
27	`"verbose": "False",`
28	`"api_max_retry": "8",`
29	`"api_retry_sleep": "1.0",`
30	`"api_retry_multiplier": "2.0",`
31	`"timeout": "3600.0",`
32	`"num_seeds_merged": "3"`
33	`}`
34	`},`
35	`"evaluation_results": [`
36	`{`
37	`"evaluation_name": "math_500",`
38	`"source_data": {`
39	`"dataset_name": "math_500",`
40	`"source_type": "hf_dataset",`
41	`"hf_repo": "HuggingFaceH4/MATH-500",`
42	`"hf_split": "test"`
43	`},`
44	`"metric_config": {`
45	`"evaluation_description": "pass@k:k=1&n=1",`
46	`"lower_is_better": false,`
47	`"score_type": "continuous",`
48	`"min_score": 0.0,`
49	`"max_score": 1.0`
50	`},`
51	`"score_details": {`
52	`"score": 0.8866666666666667,`
53	`"details": {`
54	`"seed_scores": "[0.892, 0.888, 0.88]",`
55	`"evaluation_timestamps": "[342932, 343121, 343309]",`
56	`"seed_values": "[1234, 2345, 3456]"`
57	`},`
58	`"uncertainty": {`
59	`"standard_error": {`
60	`"value": 0.003527668414752791,`
61	`"method": "across_seeds"`
62	`},`
63	`"num_samples": 3`
64	`}`
65	`},`
66	`"generation_config": {`
67	`"generation_args": {`
68	`"temperature": 1.0,`
69	`"top_p": 0.95,`
70	`"top_k": 64.0,`
71	`"max_tokens": 65536,`
72	`"max_attempts": 1`
73	`},`
74	`"additional_details": {`
75	`"seed": "1234",`
76	`"num_fewshot": "0"`
77	`}`
78	`}`
79	`}`
80	`]`
81	`}`