every_eval_ever/lcb_codegeneration_v6.json
2.3 KB · 84 lines · json Raw
1 {
2 "schema_version": "0.2.2",
3 "evaluation_id": "lcb:codegeneration_v6|0/RedHatAI/gemma-4-31B-it-FP8-block/1781646640.567",
4 "evaluation_timestamp": "345371",
5 "retrieved_timestamp": "1781646640.567",
6 "source_metadata": {
7 "source_name": "lighteval",
8 "source_type": "evaluation_run",
9 "source_organization_name": "RedHatAI",
10 "evaluator_relationship": "third_party"
11 },
12 "eval_library": {
13 "name": "lighteval",
14 "version": "v0.13.0"
15 },
16 "model_info": {
17 "name": "RedHatAI/gemma-4-31B-it-FP8-block",
18 "id": "RedHatAI/gemma-4-31B-it-FP8-block",
19 "developer": "RedHatAI",
20 "inference_engine": {
21 "name": "vllm"
22 },
23 "additional_details": {
24 "provider": "hosted_vllm",
25 "base_url": "http://127.0.0.1:8000/v1",
26 "concurrent_requests": "32",
27 "verbose": "False",
28 "api_max_retry": "8",
29 "api_retry_sleep": "1.0",
30 "api_retry_multiplier": "2.0",
31 "timeout": "1200.0",
32 "num_seeds_merged": "3"
33 }
34 },
35 "evaluation_results": [
36 {
37 "evaluation_name": "lcb:codegeneration_v6",
38 "source_data": {
39 "dataset_name": "lcb:codegeneration_v6",
40 "source_type": "hf_dataset",
41 "hf_repo": "lighteval/code_generation_lite",
42 "hf_split": "test",
43 "additional_details": {
44 "hf_subset": "v6"
45 }
46 },
47 "metric_config": {
48 "evaluation_description": "codegen_pass@1:16",
49 "lower_is_better": false,
50 "score_type": "continuous",
51 "min_score": 0.0,
52 "max_score": 1.0
53 },
54 "score_details": {
55 "score": 0.7352380952380952,
56 "details": {
57 "seed_scores": "[0.72, 0.7314285714285714, 0.7542857142857143]",
58 "evaluation_timestamps": "[345371, 345856, 346361]",
59 "seed_values": "[1234, 2345, 3456]"
60 },
61 "uncertainty": {
62 "standard_error": {
63 "value": 0.010079052613579416,
64 "method": "across_seeds"
65 },
66 "num_samples": 3
67 }
68 },
69 "generation_config": {
70 "generation_args": {
71 "temperature": 1.0,
72 "top_p": 0.95,
73 "top_k": 64.0,
74 "max_tokens": 32768,
75 "max_attempts": 1
76 },
77 "additional_details": {
78 "seed": "1234",
79 "num_fewshot": "0"
80 }
81 }
82 }
83 ]
84 }