33# credit: DevFest Pwani 2024 in Kenya. Presentation Inference Your LLMs on the fly: Serverless Cloud Run with GPU Acceleration
44# https://jochen.kirstaetter.name/
55
6- FROM ollama/ollama:0.3.3
6+ FROM ollama/ollama:0.6.8
77
88# Metadata
99LABEL maintainer="Shuyib" \
@@ -21,24 +21,65 @@ ENV OLLAMA_HOST=0.0.0.0:11434 \
2121 OLLAMA_MODELS=/models \
2222 OLLAMA_DEBUG=false \
2323 OLLAMA_KEEP_ALIVE=-1 \
24- MODEL=qwen2.5 :0.5b
24+ MODEL=qwen3 :0.6b
2525
2626# Create models directory
2727RUN mkdir -p /models && \
2828 chown -R ollama:ollama /models
2929
30- # define user
31- USER ollama
30+ # Switch to root to install small utilities and add entrypoint
31+ USER root
32+
33+ # Install curl for healthchecks (assume Debian-based image); if the base image
34+ # is different this will need adjusting. Keep installs minimal and clean up.
35+ RUN apt-get update \
36+ && apt-get install -y --no-install-recommends curl \
37+ && rm -rf /var/lib/apt/lists/*
38+
39+ # Create a lightweight entrypoint script that starts ollama in the background,
40+ # waits for the server to be ready, pulls the required model if missing, then
41+ # waits on the server process. Running pull at container start ensures the
42+ # model is placed in the container's /models volume.
43+ RUN mkdir -p /usr/local/bin && cat > /usr/local/bin/ollama-entrypoint.sh <<'EOF'
44+ #!/bin/sh
45+ set -eu
46+
47+ # Start ollama server in background
48+ ollama serve &
49+ PID=$!
50+
51+ # Wait for server readiness (max ~60s)
52+ COUNT=0
53+ while [ $COUNT -lt 60 ]; do
54+ if curl -sSf http://127.0.0.1:11434/api/version >/dev/null 2>&1; then
55+ break
56+ fi
57+ COUNT=$((COUNT+1))
58+ sleep 1
59+ done
3260
33- # Pull model
34- RUN ollama serve & sleep 5 && ollama pull $MODEL
61+ # Attempt to pull model if it's not already listed
62+ if ! ollama list 2>/dev/null | grep -q "qwen3:0.6b"; then
63+ echo "Pulling model qwen3:0.6b"
64+ # Allow pull failures to not kill container but log them
65+ ollama pull qwen3:0.6b || echo "Model pull failed; continue and let operator inspect logs"
66+ fi
67+
68+ # Wait for the server process to exit
69+ wait $PID
70+ EOF
71+
72+ RUN chmod +x /usr/local/bin/ollama-entrypoint.sh && chown ollama:ollama /usr/local/bin/ollama-entrypoint.sh
73+
74+ # Revert to running as the ollama user for security
75+ USER ollama
3576
3677# Expose port
3778EXPOSE 11434
3879
39- # Healthcheck: curl localhost:11434/api/version
80+ # Healthcheck: use curl which we installed
4081HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
41- CMD curl -f http://localhost:11434/api/version > /dev/null && echo "Ollama is healthy" || exit 1
82+ CMD curl -f http://localhost:11434/api/version > /dev/null || exit 1
4283
43- # Entrypoint: ollama serve
44- ENTRYPOINT ["ollama", "serve "]
84+ # Entrypoint: the wrapper script will start server and pull model
85+ ENTRYPOINT ["/usr/local/bin/ ollama-entrypoint.sh "]
0 commit comments