Created using Colab

ratulb · ratulb · commit 8cb2a009dd67 · 2025-05-24T17:14:30.000+05:30
diff --git a/gpu_puzzles/broadcast_add_ptr.ipynb b/gpu_puzzles/broadcast_add_ptr.ipynb
@@ -0,0 +1,211 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/ratulb/mojo_programming/blob/main/gpu_puzzles/broadcast_add_ptr.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!curl -ssL https://magic.modular.com/ | bash"
+      ],
+      "metadata": {
+        "id": "oghVhc-plDnV"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "os.environ['PATH'] += ':/root/.modular/bin'"
+      ],
+      "metadata": {
+        "id": "bo0LqVellMRb"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!magic init gpu_puzzles --format mojoproject"
+      ],
+      "metadata": {
+        "id": "orDFbNYOlmVj"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%cd gpu_puzzles/"
+      ],
+      "metadata": {
+        "id": "I_II8JVmluuj"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%%writefile broadcast_add_ptr.mojo\n",
+        "\n",
+        "### Broadcast Addiotion\n",
+        "### Add 2 vectors\n",
+        "\n",
+        "from gpu import thread_idx\n",
+        "from gpu.host import DeviceContext\n",
+        "from memory import UnsafePointer\n",
+        "from testing import assert_equal\n",
+        "\n",
+        "\n",
+        "alias SIZE = 3\n",
+        "alias dtype = DType.float32\n",
+        "alias BLOCKS_PER_GRID = 1\n",
+        "alias THREADS_PER_BLOCK = (3, 3)\n",
+        "\n",
+        "\n",
+        "fn broadcast_add_ptr(\n",
+        "    out: UnsafePointer[Scalar[dtype]],\n",
+        "    a: UnsafePointer[Scalar[dtype]],\n",
+        "    b: UnsafePointer[Scalar[dtype]],\n",
+        "):\n",
+        "    row = thread_idx.y\n",
+        "    col = thread_idx.x\n",
+        "    if row < SIZE and col < SIZE:\n",
+        "        out[row * SIZE + col] = a[row] + b[col]\n",
+        "\n",
+        "\n",
+        "fn main() raises:\n",
+        "    with DeviceContext() as ctx:\n",
+        "        out = ctx.enqueue_create_buffer[dtype](SIZE * SIZE).enqueue_fill(0)\n",
+        "        expected = ctx.enqueue_create_host_buffer[dtype](\n",
+        "            SIZE * SIZE\n",
+        "        ).enqueue_fill(0)\n",
+        "        a = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)\n",
+        "        b = ctx.enqueue_create_buffer[dtype](SIZE).enqueue_fill(0)\n",
+        "\n",
+        "        with a.map_to_host() as a_host, b.map_to_host() as b_host:\n",
+        "            for i in range(SIZE):\n",
+        "                a_host[i] = i\n",
+        "                b_host[i] = i\n",
+        "            print(a_host)\n",
+        "            print(a_host)\n",
+        "            for i in range(SIZE):\n",
+        "                for j in range(SIZE):\n",
+        "                    expected[i * SIZE + j] = a_host[i] + b_host[j]\n",
+        "            print(expected)\n",
+        "\n",
+        "        ctx.enqueue_function[broadcast_add_ptr](\n",
+        "            out.unsafe_ptr(),\n",
+        "            a.unsafe_ptr(),\n",
+        "            b.unsafe_ptr(),\n",
+        "            SIZE,\n",
+        "            grid_dim=BLOCKS_PER_GRID,\n",
+        "            block_dim=THREADS_PER_BLOCK,\n",
+        "        )\n",
+        "        ctx.synchronize()\n",
+        "\n",
+        "        with out.map_to_host() as out_host:\n",
+        "            print(out_host)\n",
+        "            for i in range(SIZE):\n",
+        "                for j in range(SIZE):\n",
+        "                    assert_equal(out_host[i * SIZE + j], expected[i * SIZE + j])\n"
+      ],
+      "metadata": {
+        "id": "r8TtOuGcmo7L",
+        "outputId": "958dce5e-d6f8-44cc-ee84-7f92f2999b33",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "execution_count": 36,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Overwriting broadcast_add_ptr.mojo\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!magic run mojo broadcast_add_ptr.mojo"
+      ],
+      "metadata": {
+        "id": "2heIJSH7lxPj",
+        "outputId": "4662a871-b739-431f-a879-21b846fcf5c6",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "execution_count": 37,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[32m⠁\u001b[0m                                                                               \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment                                                        \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment                                                        \r\u001b[2KHostBuffer([0.0, 1.0, 2.0])\n",
+            "HostBuffer([0.0, 1.0, 2.0])\n",
+            "HostBuffer([0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0])\n",
+            "HostBuffer([0.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0])\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!magic run mojo format broadcast_add_ptr.mojo"
+      ],
+      "metadata": {
+        "id": "2KeEPNK2GYKV",
+        "outputId": "29517f8d-dfff-4f99-f3be-11a2c757fa57",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "execution_count": 35,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[32m⠁\u001b[0m                                                                               \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment                                                        \r\u001b[2K\u001b[32m⠁\u001b[0m activating environment                                                        \r\u001b[2K\u001b[1mreformatted broadcast_add_ptr.mojo\u001b[0m\n",
+            "\n",
+            "\u001b[1mAll done! ✨ 🍰 ✨\u001b[0m\n",
+            "\u001b[34m\u001b[1m1 file \u001b[0m\u001b[1mreformatted\u001b[0m.\n"
+          ]
+        }
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "Welcome To Colab",
+      "provenance": [],
+      "gpuType": "T4",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "accelerator": "GPU"
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}