notebook.ipynb · NVIDIA-Nemotron-3-Nano-30B-A3B-BF16

notebook.ipynb

3.1 KB · 132 lines · plaintext Raw

1	`{`
2	`"nbformat": 4,`
3	`"nbformat_minor": 0,`
4	`"metadata": {`
5	`"colab": {`
6	`"provenance": [],`
7	`"gpuType": "A100"`
8	`},`
9	`"kernelspec": {`
10	`"name": "python3",`
11	`"display_name": "Python 3"`
12	`},`
13	`"language_info": {`
14	`"name": "python"`
15	`},`
16	`"accelerator": "GPU"`
17	`},`
18	`"cells": [`
19	`{`
20	`"cell_type": "code",`
21	`"execution_count": null,`
22	`"metadata": {`
23	`"id": "aCl-IzLoDr2H"`
24	`},`
25	`"outputs": [],`
26	`"source": [`
27	`"!pip install -U transformers mamba-ssm"`
28	`]`
29	`},`
30	`{`
31	`"cell_type": "markdown",`
32	`"source": [`
33	`"# Load Models"`
34	`],`
35	`"metadata": {`
36	`"id": "SpRo_KJIRsxv"`
37	`}`
38	`},`
39	`{`
40	`"cell_type": "code",`
41	`"source": [`
42	`"import torch\n",`
43	`"from transformers import AutoTokenizer, AutoModelForCausalLM\n",`
44	`"\n",`
45	`"# Load tokenizer and model\n",`
46	`"tokenizer = AutoTokenizer.from_pretrained(\"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16\")\n",`
47	`"model = AutoModelForCausalLM.from_pretrained(\n",`
48	`" \"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16\",\n",`
49	`" torch_dtype=torch.bfloat16,\n",`
50	`" trust_remote_code=True,\n",`
51	`" device_map=\"auto\"\n",`
52	`")\n"`
53	`],`
54	`"metadata": {`
55	`"id": "waveliieEI1n"`
56	`},`
57	`"execution_count": null,`
58	`"outputs": []`
59	`},`
60	`{`
61	`"cell_type": "markdown",`
62	`"source": [`
63	`"# Define Input with Tools"`
64	`],`
65	`"metadata": {`
66	`"id": "xjVkqaSdRx0_"`
67	`}`
68	`},`
69	`{`
70	`"cell_type": "code",`
71	`"source": [`
72	`"from transformers.utils import get_json_schema\n",`
73	`"\n",`
74	`"def multiply(a: float, b: float):\n",`
75	`" \"\"\"\n",`
76	`" A function that multiplies two numbers\n",`
77	`"\n",`
78	`" Args:\n",`
79	`" a: The first number to multiply\n",`
80	`" b: The second number to multiply\n",`
81	`" \"\"\"\n",`
82	`" return a * b\n",`
83	`"\n",`
84	`"messages = [\n",`
85	`" {\"role\": \"user\", \"content\": \"what is 2.0909090923 x 0.897987987\"},\n",`
86	`"]\n",`
87	`"\n",`
88	`"tokenized_chat = tokenizer.apply_chat_template(\n",`
89	`" messages,\n",`
90	`" tools=[\n",`
91	`" multiply\n",`
92	`" ],\n",`
93	`" tokenize=True,\n",`
94	`" add_generation_prompt=True,\n",`
95	`" return_tensors=\"pt\"\n",`
96	`").to(model.device)\n"`
97	`],`
98	`"metadata": {`
99	`"id": "zxZZ7iMZETsw"`
100	`},`
101	`"execution_count": null,`
102	`"outputs": []`
103	`},`
104	`{`
105	`"cell_type": "markdown",`
106	`"source": [`
107	`"# Inference"`
108	`],`
109	`"metadata": {`
110	`"id": "SVBAG3dLRw4v"`
111	`}`
112	`},`
113	`{`
114	`"cell_type": "code",`
115	`"source": [`
116	`"outputs = model.generate(\n",`
117	`" tokenized_chat,\n",`
118	`" max_new_tokens=1024,\n",`
119	`" temperature=1.0,\n",`
120	`" top_p=1.0,\n",`
121	`" eos_token_id=tokenizer.eos_token_id\n",`
122	`")\n",`
123	`"print(tokenizer.decode(outputs[0]))"`
124	`],`
125	`"metadata": {`
126	`"id": "BKYqPT5ORDx3"`
127	`},`
128	`"execution_count": null,`
129	`"outputs": []`
130	`}`
131	`]`
132	`}`