notebook.ipynb
3.1 KB · 132 lines · plaintext Raw
1 {
2 "nbformat": 4,
3 "nbformat_minor": 0,
4 "metadata": {
5 "colab": {
6 "provenance": [],
7 "gpuType": "A100"
8 },
9 "kernelspec": {
10 "name": "python3",
11 "display_name": "Python 3"
12 },
13 "language_info": {
14 "name": "python"
15 },
16 "accelerator": "GPU"
17 },
18 "cells": [
19 {
20 "cell_type": "code",
21 "execution_count": null,
22 "metadata": {
23 "id": "aCl-IzLoDr2H"
24 },
25 "outputs": [],
26 "source": [
27 "!pip install -U transformers mamba-ssm"
28 ]
29 },
30 {
31 "cell_type": "markdown",
32 "source": [
33 "# Load Models"
34 ],
35 "metadata": {
36 "id": "SpRo_KJIRsxv"
37 }
38 },
39 {
40 "cell_type": "code",
41 "source": [
42 "import torch\n",
43 "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
44 "\n",
45 "# Load tokenizer and model\n",
46 "tokenizer = AutoTokenizer.from_pretrained(\"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16\")\n",
47 "model = AutoModelForCausalLM.from_pretrained(\n",
48 " \"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16\",\n",
49 " torch_dtype=torch.bfloat16,\n",
50 " trust_remote_code=True,\n",
51 " device_map=\"auto\"\n",
52 ")\n"
53 ],
54 "metadata": {
55 "id": "waveliieEI1n"
56 },
57 "execution_count": null,
58 "outputs": []
59 },
60 {
61 "cell_type": "markdown",
62 "source": [
63 "# Define Input with Tools"
64 ],
65 "metadata": {
66 "id": "xjVkqaSdRx0_"
67 }
68 },
69 {
70 "cell_type": "code",
71 "source": [
72 "from transformers.utils import get_json_schema\n",
73 "\n",
74 "def multiply(a: float, b: float):\n",
75 " \"\"\"\n",
76 " A function that multiplies two numbers\n",
77 "\n",
78 " Args:\n",
79 " a: The first number to multiply\n",
80 " b: The second number to multiply\n",
81 " \"\"\"\n",
82 " return a * b\n",
83 "\n",
84 "messages = [\n",
85 " {\"role\": \"user\", \"content\": \"what is 2.0909090923 x 0.897987987\"},\n",
86 "]\n",
87 "\n",
88 "tokenized_chat = tokenizer.apply_chat_template(\n",
89 " messages,\n",
90 " tools=[\n",
91 " multiply\n",
92 " ],\n",
93 " tokenize=True,\n",
94 " add_generation_prompt=True,\n",
95 " return_tensors=\"pt\"\n",
96 ").to(model.device)\n"
97 ],
98 "metadata": {
99 "id": "zxZZ7iMZETsw"
100 },
101 "execution_count": null,
102 "outputs": []
103 },
104 {
105 "cell_type": "markdown",
106 "source": [
107 "# Inference"
108 ],
109 "metadata": {
110 "id": "SVBAG3dLRw4v"
111 }
112 },
113 {
114 "cell_type": "code",
115 "source": [
116 "outputs = model.generate(\n",
117 " tokenized_chat,\n",
118 " max_new_tokens=1024,\n",
119 " temperature=1.0,\n",
120 " top_p=1.0,\n",
121 " eos_token_id=tokenizer.eos_token_id\n",
122 ")\n",
123 "print(tokenizer.decode(outputs[0]))"
124 ],
125 "metadata": {
126 "id": "BKYqPT5ORDx3"
127 },
128 "execution_count": null,
129 "outputs": []
130 }
131 ]
132 }