1+ {
2+ "cells" : [
3+ {
4+ "cell_type" : " markdown" ,
5+ "metadata" : {
6+ "id" : " view-in-github" ,
7+ "colab_type" : " text"
8+ },
9+ "source" : [
10+ " <a href=\" https://colab.research.google.com/github/ratulb/mojo_programming/blob/main/gpu_puzzles/broadcast_add_ptr.ipynb\" target=\" _parent\" ><img src=\" https://colab.research.google.com/assets/colab-badge.svg\" alt=\" Open In Colab\" /></a>"
11+ ]
12+ },
13+ {
14+ "cell_type" : " code" ,
15+ "source" : [
16+ " !curl -ssL https://magic.modular.com/ | bash"
17+ ],
18+ "metadata" : {
19+ "id" : " oghVhc-plDnV"
20+ },
21+ "execution_count" : null ,
22+ "outputs" : []
23+ },
24+ {
25+ "cell_type" : " code" ,
26+ "source" : [
27+ " import os\n " ,
28+ " os.environ['PATH'] += ':/root/.modular/bin'"
29+ ],
30+ "metadata" : {
31+ "id" : " bo0LqVellMRb"
32+ },
33+ "execution_count" : null ,
34+ "outputs" : []
35+ },
36+ {
37+ "cell_type" : " code" ,
38+ "source" : [
39+ " !magic init gpu_puzzles --format mojoproject"
40+ ],
41+ "metadata" : {
42+ "id" : " orDFbNYOlmVj"
43+ },
44+ "execution_count" : null ,
45+ "outputs" : []
46+ },
47+ {
48+ "cell_type" : " code" ,
49+ "source" : [
50+ " %cd gpu_puzzles/"
51+ ],
52+ "metadata" : {
53+ "id" : " I_II8JVmluuj"
54+ },
55+ "execution_count" : null ,
56+ "outputs" : []
57+ },
58+ {
59+ "cell_type" : " code" ,
60+ "source" : [
61+ " %%writefile broadcast_add_ptr.mojo\n " ,
62+ " \n " ,
63+ " ### Broadcast Addiotion\n " ,
64+ " ### Add 2 vectors\n " ,
65+ " \n " ,
66+ " from gpu import thread_idx\n " ,
67+ " from gpu.host import DeviceContext\n " ,
68+ " from memory import UnsafePointer\n " ,
69+ " from testing import assert_equal\n " ,
70+ " \n " ,
71+ " \n " ,
72+ " alias SIZE = 3\n " ,
73+ " alias dtype = DType.float32\n " ,
74+ " alias BLOCKS_PER_GRID = 1\n " ,
75+ " alias THREADS_PER_BLOCK = (3, 3)\n " ,
76+ " \n " ,
77+ " \n " ,
78+ " fn broadcast_add_ptr(\n " ,
79+ " out: UnsafePointer[Scalar[dtype]],\n " ,
80+ " a: UnsafePointer[Scalar[dtype]],\n " ,
81+ " b: UnsafePointer[Scalar[dtype]],\n " ,
82+ " ):\n " ,
83+ " row = thread_idx.y\n " ,
84+ " col = thread_idx.x\n " ,
85+ " if row < SIZE and col < SIZE:\n " ,
86+ " out[row * SIZE + col] = a[row] + b[col]\n " ,
87+ " \n " ,
88+ " \n " ,
89+ " fn main() raises:\n " ,
90+ " with DeviceContext() as ctx:\n " ,
91+ " out = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(0)\n " ,
92+ " expected = ctx.enqueue_create_host_buffer[dtype](\n " ,
93+ " SIZE * SIZE\n " ,
94+ " ).enqueue_fill(0)\n " ,
95+ " a = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)\n " ,
96+ " b = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)\n " ,
97+ " \n " ,
98+ " with a.map_to_host() as a_host, b.map_to_host() as b_host:\n " ,
99+ " for i in range(SIZE):\n " ,
100+ " a_host[i] = i\n " ,
101+ " b_host[i] = i\n " ,
102+ " print(a_host)\n " ,
103+ " print(a_host)\n " ,
104+ " for i in range(SIZE):\n " ,
105+ " for j in range(SIZE):\n " ,
106+ " expected[i * SIZE + j] = a_host[i] + b_host[j]\n " ,
107+ " print(expected)\n " ,
108+ " \n " ,
109+ " ctx.enqueue_function[broadcast_add_ptr](\n " ,
110+ " out.unsafe_ptr(),\n " ,
111+ " a.unsafe_ptr(),\n " ,
112+ " b.unsafe_ptr(),\n " ,
113+ " SIZE,\n " ,
114+ " grid_dim=BLOCKS_PER_GRID,\n " ,
115+ " block_dim=THREADS_PER_BLOCK,\n " ,
116+ " )\n " ,
117+ " ctx.synchronize()\n " ,
118+ " \n " ,
119+ " with out.map_to_host() as out_host:\n " ,
120+ " print(out_host)\n " ,
121+ " for i in range(SIZE):\n " ,
122+ " for j in range(SIZE):\n " ,
123+ " assert_equal(out_host[i * SIZE + j], expected[i * SIZE + j])\n "
124+ ],
125+ "metadata" : {
126+ "id" : " r8TtOuGcmo7L" ,
127+ "outputId" : " 958dce5e-d6f8-44cc-ee84-7f92f2999b33" ,
128+ "colab" : {
129+ "base_uri" : " https://localhost:8080/"
130+ }
131+ },
132+ "execution_count" : 36 ,
133+ "outputs" : [
134+ {
135+ "output_type" : " stream" ,
136+ "name" : " stdout" ,
137+ "text" : [
138+ " Overwriting broadcast_add_ptr.mojo\n "
139+ ]
140+ }
141+ ]
142+ },
143+ {
144+ "cell_type" : " code" ,
145+ "source" : [
146+ " !magic run mojo broadcast_add_ptr.mojo"
147+ ],
148+ "metadata" : {
149+ "id" : " 2heIJSH7lxPj" ,
150+ "outputId" : " 4662a871-b739-431f-a879-21b846fcf5c6" ,
151+ "colab" : {
152+ "base_uri" : " https://localhost:8080/"
153+ }
154+ },
155+ "execution_count" : 37 ,
156+ "outputs" : [
157+ {
158+ "output_type" : " stream" ,
159+ "name" : " stdout" ,
160+ "text" : [
161+ " \u001b [32m⠁\u001b [0m \r \u001b [2K\u001b [32m⠁\u001b [0m activating environment \r \u001b [2K\u001b [32m⠁\u001b [0m activating environment \r \u001b [2KHostBuffer([0.0, 1.0, 2.0])\n " ,
162+ " HostBuffer([0.0, 1.0, 2.0])\n " ,
163+ " HostBuffer([0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0])\n " ,
164+ " HostBuffer([0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0])\n "
165+ ]
166+ }
167+ ]
168+ },
169+ {
170+ "cell_type" : " code" ,
171+ "source" : [
172+ " !magic run mojo format broadcast_add_ptr.mojo"
173+ ],
174+ "metadata" : {
175+ "id" : " 2KeEPNK2GYKV" ,
176+ "outputId" : " 29517f8d-dfff-4f99-f3be-11a2c757fa57" ,
177+ "colab" : {
178+ "base_uri" : " https://localhost:8080/"
179+ }
180+ },
181+ "execution_count" : 35 ,
182+ "outputs" : [
183+ {
184+ "output_type" : " stream" ,
185+ "name" : " stdout" ,
186+ "text" : [
187+ " \u001b [32m⠁\u001b [0m \r \u001b [2K\u001b [32m⠁\u001b [0m activating environment \r \u001b [2K\u001b [32m⠁\u001b [0m activating environment \r \u001b [2K\u001b [1mreformatted broadcast_add_ptr.mojo\u001b [0m\n " ,
188+ " \n " ,
189+ " \u001b [1mAll done! ✨ 🍰 ✨\u001b [0m\n " ,
190+ " \u001b [34m\u001b [1m1 file \u001b [0m\u001b [1mreformatted\u001b [0m.\n "
191+ ]
192+ }
193+ ]
194+ }
195+ ],
196+ "metadata" : {
197+ "colab" : {
198+ "name" : " Welcome To Colab" ,
199+ "provenance" : [],
200+ "gpuType" : " T4" ,
201+ "include_colab_link" : true
202+ },
203+ "kernelspec" : {
204+ "display_name" : " Python 3" ,
205+ "name" : " python3"
206+ },
207+ "accelerator" : " GPU"
208+ },
209+ "nbformat" : 4 ,
210+ "nbformat_minor" : 0
211+ }
0 commit comments