nemo-evaluator-launcher-configs/local_nvidia_nemotron_3_nano_30b_a3b.yaml
| 1 | # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| 2 | # SPDX-License-Identifier: Apache-2.0 |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | |
| 16 | defaults: |
| 17 | - execution: local |
| 18 | - deployment: none |
| 19 | - _self_ |
| 20 | |
| 21 | execution: |
| 22 | output_dir: ./results_nvidia_nemotron_3_nano_30b_a3b |
| 23 | mounts: |
| 24 | evaluation: |
| 25 | ./hf_cache: /root/.cache/huggingface |
| 26 | env_vars: |
| 27 | evaluation: {} |
| 28 | |
| 29 | target: |
| 30 | api_endpoint: |
| 31 | model_id: nvidia/nemotron-nano-3-30b-a3b |
| 32 | url: https://integrate.api.nvidia.com/v1/chat/completions |
| 33 | api_key_name: NGC_API_KEY # API Key with access to build.nvidia.com |
| 34 | |
| 35 | evaluation: |
| 36 | env_vars: |
| 37 | HF_TOKEN: HF_TOKEN |
| 38 | JUDGE_API_KEY: JUDGE_API_KEY # API Key with access to gpt-4o for HLE |
| 39 | HF_HOME: HF_HOME |
| 40 | nemo_evaluator_config: |
| 41 | config: |
| 42 | params: |
| 43 | max_new_tokens: 131072 |
| 44 | temperature: 0.99999 |
| 45 | top_p: 0.99999 |
| 46 | parallelism: 512 |
| 47 | request_timeout: 3600 |
| 48 | max_retries: 10 |
| 49 | extra: |
| 50 | tokenizer: NVIDIA-Nemotron-Nano-3-30B-A3B-BF16 |
| 51 | tokenizer_backend: huggingface |
| 52 | target: |
| 53 | api_endpoint: |
| 54 | adapter_config: |
| 55 | use_caching: true |
| 56 | tracking_requests_stats: true |
| 57 | log_failed_requests: true |
| 58 | use_request_logging: true |
| 59 | max_logged_requests: 10 |
| 60 | use_response_logging: true |
| 61 | max_logged_responses: 10 |
| 62 | tasks: |
| 63 | - name: ns_bfcl_v3 |
| 64 | env_vars: |
| 65 | HF_TOKEN: HF_TOKEN |
| 66 | nemo_evaluator_config: |
| 67 | config: |
| 68 | params: |
| 69 | temperature: 0.6 |
| 70 | top_p: 0.95 |
| 71 | parallelism: 32 |
| 72 | extra: |
| 73 | num_repeats: 1 |
| 74 | args: ++use_client_parsing=False |
| 75 | target: |
| 76 | api_endpoint: |
| 77 | adapter_config: |
| 78 | use_caching: false |
| 79 | - name: ns_bfcl_v4 |
| 80 | env_vars: |
| 81 | HF_TOKEN: HF_TOKEN |
| 82 | nemo_evaluator_config: |
| 83 | config: |
| 84 | params: |
| 85 | max_new_tokens: 8192 |
| 86 | parallelism: 128 |
| 87 | temperature: 0.6 |
| 88 | top_p: 0.95 |
| 89 | extra: |
| 90 | num_repeats: 1 |
| 91 | args: ++use_client_parsing=False |
| 92 | - name: ns_livecodebench |
| 93 | env_vars: |
| 94 | HF_TOKEN: HF_TOKEN |
| 95 | nemo_evaluator_config: |
| 96 | config: |
| 97 | params: |
| 98 | extra: |
| 99 | num_repeats: 8 |
| 100 | dataset_split: test_v5_2407_2412 |
| 101 | - name: ns_mmlu_pro |
| 102 | env_vars: |
| 103 | HF_TOKEN: HF_TOKEN |
| 104 | nemo_evaluator_config: |
| 105 | config: |
| 106 | params: |
| 107 | extra: |
| 108 | num_repeats: 1 |
| 109 | args: "++prompt_config=eval/aai/mcq-10choices-boxed" |
| 110 | - name: ns_gpqa |
| 111 | env_vars: |
| 112 | HF_TOKEN: HF_TOKEN |
| 113 | nemo_evaluator_config: |
| 114 | config: |
| 115 | params: |
| 116 | extra: |
| 117 | num_repeats: 8 |
| 118 | args: "++prompt_config=eval/aai/mcq-4choices" |
| 119 | - name: ns_aime2025 |
| 120 | env_vars: |
| 121 | HF_TOKEN: HF_TOKEN |
| 122 | JUDGE_API_KEY: JUDGE_API_KEY |
| 123 | nemo_evaluator_config: |
| 124 | config: |
| 125 | params: |
| 126 | extra: |
| 127 | num_repeats: 64 |
| 128 | args: ++prompt_config=/prompt_templates/math-oai.yaml |
| 129 | - name: ns_scicode |
| 130 | env_vars: |
| 131 | HF_TOKEN: HF_TOKEN |
| 132 | JUDGE_API_KEY: JUDGE_API_KEY |
| 133 | nemo_evaluator_config: |
| 134 | config: |
| 135 | params: |
| 136 | extra: |
| 137 | num_repeats: 8 |
| 138 | - name: ns_ifbench |
| 139 | env_vars: |
| 140 | HF_TOKEN: HF_TOKEN |
| 141 | nemo_evaluator_config: |
| 142 | config: |
| 143 | params: |
| 144 | extra: |
| 145 | num_repeats: 8 |
| 146 | - name: ns_hle |
| 147 | env_vars: |
| 148 | HF_TOKEN: HF_TOKEN |
| 149 | JUDGE_API_KEY: JUDGE_API_KEY |
| 150 | nemo_evaluator_config: |
| 151 | config: |
| 152 | params: |
| 153 | extra: |
| 154 | num_repeats: 1 |
| 155 | judge_support: true |
| 156 | judge: |
| 157 | parallelism: 16 |
| 158 | model_id: openai/gpt-4o |
| 159 | url: <OPENAI_API_URL_FOR_JUDGE> |
| 160 | api_key: JUDGE_API_KEY |
| 161 | |