nemo-evaluator-launcher-configs/local_nvidia_nemotron_3_nano_30b_a3b.yaml
4.1 KB · 161 lines · yaml Raw
1 # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2 # SPDX-License-Identifier: Apache-2.0
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16 defaults:
17 - execution: local
18 - deployment: none
19 - _self_
20
21 execution:
22 output_dir: ./results_nvidia_nemotron_3_nano_30b_a3b
23 mounts:
24 evaluation:
25 ./hf_cache: /root/.cache/huggingface
26 env_vars:
27 evaluation: {}
28
29 target:
30 api_endpoint:
31 model_id: nvidia/nemotron-nano-3-30b-a3b
32 url: https://integrate.api.nvidia.com/v1/chat/completions
33 api_key_name: NGC_API_KEY # API Key with access to build.nvidia.com
34
35 evaluation:
36 env_vars:
37 HF_TOKEN: HF_TOKEN
38 JUDGE_API_KEY: JUDGE_API_KEY # API Key with access to gpt-4o for HLE
39 HF_HOME: HF_HOME
40 nemo_evaluator_config:
41 config:
42 params:
43 max_new_tokens: 131072
44 temperature: 0.99999
45 top_p: 0.99999
46 parallelism: 512
47 request_timeout: 3600
48 max_retries: 10
49 extra:
50 tokenizer: NVIDIA-Nemotron-Nano-3-30B-A3B-BF16
51 tokenizer_backend: huggingface
52 target:
53 api_endpoint:
54 adapter_config:
55 use_caching: true
56 tracking_requests_stats: true
57 log_failed_requests: true
58 use_request_logging: true
59 max_logged_requests: 10
60 use_response_logging: true
61 max_logged_responses: 10
62 tasks:
63 - name: ns_bfcl_v3
64 env_vars:
65 HF_TOKEN: HF_TOKEN
66 nemo_evaluator_config:
67 config:
68 params:
69 temperature: 0.6
70 top_p: 0.95
71 parallelism: 32
72 extra:
73 num_repeats: 1
74 args: ++use_client_parsing=False
75 target:
76 api_endpoint:
77 adapter_config:
78 use_caching: false
79 - name: ns_bfcl_v4
80 env_vars:
81 HF_TOKEN: HF_TOKEN
82 nemo_evaluator_config:
83 config:
84 params:
85 max_new_tokens: 8192
86 parallelism: 128
87 temperature: 0.6
88 top_p: 0.95
89 extra:
90 num_repeats: 1
91 args: ++use_client_parsing=False
92 - name: ns_livecodebench
93 env_vars:
94 HF_TOKEN: HF_TOKEN
95 nemo_evaluator_config:
96 config:
97 params:
98 extra:
99 num_repeats: 8
100 dataset_split: test_v5_2407_2412
101 - name: ns_mmlu_pro
102 env_vars:
103 HF_TOKEN: HF_TOKEN
104 nemo_evaluator_config:
105 config:
106 params:
107 extra:
108 num_repeats: 1
109 args: "++prompt_config=eval/aai/mcq-10choices-boxed"
110 - name: ns_gpqa
111 env_vars:
112 HF_TOKEN: HF_TOKEN
113 nemo_evaluator_config:
114 config:
115 params:
116 extra:
117 num_repeats: 8
118 args: "++prompt_config=eval/aai/mcq-4choices"
119 - name: ns_aime2025
120 env_vars:
121 HF_TOKEN: HF_TOKEN
122 JUDGE_API_KEY: JUDGE_API_KEY
123 nemo_evaluator_config:
124 config:
125 params:
126 extra:
127 num_repeats: 64
128 args: ++prompt_config=/prompt_templates/math-oai.yaml
129 - name: ns_scicode
130 env_vars:
131 HF_TOKEN: HF_TOKEN
132 JUDGE_API_KEY: JUDGE_API_KEY
133 nemo_evaluator_config:
134 config:
135 params:
136 extra:
137 num_repeats: 8
138 - name: ns_ifbench
139 env_vars:
140 HF_TOKEN: HF_TOKEN
141 nemo_evaluator_config:
142 config:
143 params:
144 extra:
145 num_repeats: 8
146 - name: ns_hle
147 env_vars:
148 HF_TOKEN: HF_TOKEN
149 JUDGE_API_KEY: JUDGE_API_KEY
150 nemo_evaluator_config:
151 config:
152 params:
153 extra:
154 num_repeats: 1
155 judge_support: true
156 judge:
157 parallelism: 16
158 model_id: openai/gpt-4o
159 url: <OPENAI_API_URL_FOR_JUDGE>
160 api_key: JUDGE_API_KEY
161