grctest
diff --git a/‎.github/workflows/docker-image.yml‎
Lines changed: 29 additions & 0 deletions b/‎.github/workflows/docker-image.yml‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 16 additions & 28 deletions b/‎Dockerfile‎
Lines changed: 16 additions & 28 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 16 deletions b/‎README.md‎
Lines changed: 8 additions & 16 deletions
@@ -0,0 +1,29 @@
+name: Build and Push Docker Image
+
+on:
+  push:
+    tags:
+      - "v*.*.*"
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: grctest/fastapi_bitnet:latest
@@ -1,44 +1,32 @@
-FROM python:3.9
+FROM python:3.10
 
 WORKDIR /code
 
 COPY ./app /code
 
-RUN if [ -z "$(ls -A /code/models)" ]; then \
-        echo "Error: No models found in /code/models" && exit 1; \
-    fi
+# Clone BitNet with submodules directly into /code (ensures all files and submodules are present)
+RUN git clone --recursive https://github.com/microsoft/BitNet.git /tmp/BitNet && \
+    cp -r /tmp/BitNet/* /code && \
+    rm -rf /tmp/BitNet
 
+# Install dependencies
 RUN apt-get update && apt-get install -y \
     wget \
     lsb-release \
     software-properties-common \
     gnupg \
-    cmake && \
-    bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN git clone --recursive https://github.com/microsoft/BitNet.git /tmp/BitNet && \
-    cp -r /tmp/BitNet/* /code && \
-    rm -rf /tmp/BitNet
+    cmake \
+    clang \
+    && bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
 
+# Install Python dependencies
 RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt && \
-    pip install "fastapi[standard]" "uvicorn[standard]"
-
-RUN if [ -d "/code/models/Llama3-8B-1.58-100B-tokens" ]; then \
-        python /code/setup_env.py -md /code/models/Llama3-8B-1.58-100B-tokens -q i2_s --use-pretuned && \
-        find /code/models/Llama3-8B-1.58-100B-tokens -type f -name "*f32*.gguf" -delete; \
-    fi
-
-RUN if [ -d "/code/models/bitnet_b1_58-large" ]; then \
-        python /code/setup_env.py -md /code/models/bitnet_b1_58-large -q i2_s --use-pretuned && \
-        find /code/models/bitnet_b1_58-large -type f -name "*f32*.gguf" -delete; \
-    fi
-
-RUN if [ -d "/code/models/bitnet_b1_58-3B" ]; then \
-        python /code/setup_env.py -md /code/models/bitnet_b1_58-3B -q i2_s --use-pretuned && \
-        find /code/models/bitnet_b1_58-3B -type f -name "*f32*.gguf" -delete; \
-    fi
+    pip install "fastapi[standard]" "uvicorn[standard]" httpx fastapi-mcp
+
+# (Optional) Run your setup_env.py if needed
+RUN python /code/setup_env.py -md /code/models/BitNet-b1.58-2B-4T -q i2_s
 
 EXPOSE 8080
 
 
@@ -8,12 +8,16 @@ It's offers the same functionality as the [Electron-BitNet](https://github.com/g
 
 ## Setup instructions
 
+If running in dev mode, run Docker Desktop on windows to initialize docker in WSL2.
+
+Launch WSL: `wsl`
+
 Install Conda: https://anaconda.org/anaconda/conda
 
 Initialize the python environment:
 ```
 conda init
-conda create -n bitnet python=3.9
+conda create -n bitnet python=3.11
 conda activate bitnet
 ```
 
@@ -22,11 +26,9 @@ Install the Huggingface-CLI tool to download the models:
 pip install -U "huggingface_hub[cli]"
 ```
 
-Download one/many of the 1-bit models from Huggingface below:
+Download Microsoft's official BitNet model:
 ```
-huggingface-cli download 1bitLLM/bitnet_b1_58-large --local-dir app/models/bitnet_b1_58-large
-huggingface-cli download 1bitLLM/bitnet_b1_58-3B --local-dir app/models/bitnet_b1_58-3B
-huggingface-cli download HF1BitLLM/Llama3-8B-1.58-100B-tokens --local-dir app/models/Llama3-8B-1.58-100B-tokens
+huggingface-cli download microsoft/BitNet-b1.58-2B-4T-gguf --local-dir app/models/BitNet-b1.58-2B-4T
 ```
 
 Build the docker image:
@@ -39,14 +41,4 @@ Run the docker image:
 docker run -d --name ai_container -p 8080:8080 fastapi_bitnet
 ```
 
-Once it's running navigate to http://127.0.0.1:8080/docs
-
----
-
-Note:
-
-If seeking to use this in production, make sure to extend the docker image with additional [authentication security](https://github.com/mjhea0/awesome-fastapi?tab=readme-ov-file#auth) steps. In its current state it's intended for use locally.
-
-Building the docker file image requires upwards of 40GB RAM for `Llama3-8B-1.58-100B-tokens`, if you have less than 64GB RAM you will probably run into issues.
-
-The Dockerfile deletes the larger f32 files, so as to reduce the time to build the docker image file, you'll need to comment out the `find /code/models/....` lines if you want the larger f32 files included.
+Once it's running navigate to http://127.0.0.1:8080/docs