every_eval_ever/math_500.json
2.2 KB · 81 lines · json Raw
1 {
2 "schema_version": "0.2.2",
3 "evaluation_id": "math_500|0/RedHatAI/gemma-4-31B-it-FP8-block/1781646637.177847",
4 "evaluation_timestamp": "342932",
5 "retrieved_timestamp": "1781646637.177847",
6 "source_metadata": {
7 "source_name": "lighteval",
8 "source_type": "evaluation_run",
9 "source_organization_name": "RedHatAI",
10 "evaluator_relationship": "third_party"
11 },
12 "eval_library": {
13 "name": "lighteval",
14 "version": "v0.13.0"
15 },
16 "model_info": {
17 "name": "RedHatAI/gemma-4-31B-it-FP8-block",
18 "id": "RedHatAI/gemma-4-31B-it-FP8-block",
19 "developer": "RedHatAI",
20 "inference_engine": {
21 "name": "vllm"
22 },
23 "additional_details": {
24 "provider": "hosted_vllm",
25 "base_url": "http://127.0.0.1:8000/v1",
26 "concurrent_requests": "32",
27 "verbose": "False",
28 "api_max_retry": "8",
29 "api_retry_sleep": "1.0",
30 "api_retry_multiplier": "2.0",
31 "timeout": "3600.0",
32 "num_seeds_merged": "3"
33 }
34 },
35 "evaluation_results": [
36 {
37 "evaluation_name": "math_500",
38 "source_data": {
39 "dataset_name": "math_500",
40 "source_type": "hf_dataset",
41 "hf_repo": "HuggingFaceH4/MATH-500",
42 "hf_split": "test"
43 },
44 "metric_config": {
45 "evaluation_description": "pass@k:k=1&n=1",
46 "lower_is_better": false,
47 "score_type": "continuous",
48 "min_score": 0.0,
49 "max_score": 1.0
50 },
51 "score_details": {
52 "score": 0.8866666666666667,
53 "details": {
54 "seed_scores": "[0.892, 0.888, 0.88]",
55 "evaluation_timestamps": "[342932, 343121, 343309]",
56 "seed_values": "[1234, 2345, 3456]"
57 },
58 "uncertainty": {
59 "standard_error": {
60 "value": 0.003527668414752791,
61 "method": "across_seeds"
62 },
63 "num_samples": 3
64 }
65 },
66 "generation_config": {
67 "generation_args": {
68 "temperature": 1.0,
69 "top_p": 0.95,
70 "top_k": 64.0,
71 "max_tokens": 65536,
72 "max_attempts": 1
73 },
74 "additional_details": {
75 "seed": "1234",
76 "num_fewshot": "0"
77 }
78 }
79 }
80 ]
81 }