diff --git a/docker/compose-controller-spark-sql-external-storage.yaml b/docker/compose-controller-spark-sql-external-storage.yaml
new file mode 100644
index 000000000..bdc14448f
--- /dev/null
+++ b/docker/compose-controller-spark-sql-external-storage.yaml
@@ -0,0 +1,109 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This docker-compose configuration is for bringing up a pipeline controller
+# along with a single-process Spark environment with a JDBC endpoint.
+
+# Environment variables:
+#
+# PIPELINE_CONFIG: The directory that contains pipeline configurations, namely
+# application.yaml and flink-conf.yaml files.
+#
+# DWH_ROOT: The directory where Parquet files are written. This is shared
+# between all containers; the pipeline writes to it and Spark ones read.
+#
+# Note if local paths are used, they should start with `./ `or `../`. Also the
+# mounted files should be readable by containers, e.g., world-readable.
+#
+
+# NOTES ON SPARK:
+# This is a very simple single-process Spark configuration to be able to run
+# SQL queries against Parquet files generated by the pipeline. It exposes an
+# endpoint on port 10001 which can be used for JDBC connection from any SQL
+# client.
+#
+# For a more complete configuration which shows different pieces that are needed
+# for a cluster environment, please see `compose-controller-spark-sql.yaml`.
+
+# NOTES ON METASTORE:
+# This configuration uses the default embedded Derby database as Metastore for
+# the thriftserver. Example config lines are provided (but commented out) that
+# show how to use an external DB instead.
+
+# OTHER CONFIGS:
+# If you want to change Spark default configs, you can mount your config files
+# to /opt/bitnami/spark/conf/
+# https://spark.apache.org/docs/latest/configuration.html
+
+version: '2'
+
+services:
+ drivers-build:
+ container_name: drivers-build
+ build:
+ context: ./drivers-build
+ command:
+ # copies the drivers from the drivers-build folder to JdbcDrivers volume to be used by the spark containers
+ - /bin/sh
+ - -ec
+ - |-
+ cp -R /jdbcDrivers/* /drivers-build/jdbcDrivers
+ volumes:
+ - jdbcDrivers:/drivers-build/jdbcDrivers
+ pipeline-controller:
+ # to force a build use `--build` option of `docker-compose up`.
+ build:
+ context: ..
+ container_name: pipeline-controller
+ volumes:
+ - ${PIPELINE_CONFIG}:/app/config:ro
+ - ${DWH_ROOT}:/dwh
+ ports:
+ - '8090:8080'
+
+ spark:
+ image: docker.io/bitnami/spark:3.3
+ container_name: spark-thriftserver
+ command:
+ # copies the drivers to the jars directory before the thrift server starts.
+ - /bin/bash
+ - -ec
+ - |-
+ cp -R /drivers-build/jdbcDrivers/* /opt/bitnami/spark/jars/
+ sbin/start-thriftserver.sh
+ environment:
+ - HIVE_SERVER2_THRIFT_PORT=10000
+ ports:
+ - '10001:10000'
+ - '4041:4040'
+ volumes:
+ - ${DWH_ROOT}:/dwh
+ - ./hive-site_example.xml:/opt/bitnami/spark/conf/hive-site.xml
+ volumes_from:
+ - drivers-build
+
+ postgres:
+ image: postgres:14
+ ports:
+ - "5470:5432"
+ environment:
+ - "POSTGRES_PASSWORD=admin"
+ - "POSTGRES_USER=admin"
+ - "POSTGRES_DB=custom_metastore_db"
+ volumes:
+ - pgdata:/var/lib/postgresql/data
+
+volumes:
+ jdbcDrivers:
+ pgdata:
diff --git a/docker/drivers-build/Dockerfile b/docker/drivers-build/Dockerfile
new file mode 100644
index 000000000..5d02ba0d7
--- /dev/null
+++ b/docker/drivers-build/Dockerfile
@@ -0,0 +1,12 @@
+FROM alpine:3.17.3
+
+WORKDIR /jdbcDrivers
+
+ARG POSTGRESQL_DRIVER_VERSION=42.6.0
+
+# Install required packages
+RUN apk update && apk add curl
+
+# Fetch drivers
+RUN curl -s https://jdbc.postgresql.org/download/postgresql-$POSTGRESQL_DRIVER_VERSION.jar \
+ -o postgresql-$POSTGRESQL_DRIVER_VERSION.jar
diff --git a/docker/drivers-build/README.md b/docker/drivers-build/README.md
new file mode 100644
index 000000000..a87e70180
--- /dev/null
+++ b/docker/drivers-build/README.md
@@ -0,0 +1,6 @@
+# Overview
+
+The docker/drivers-build directory contains a sample Dockerfile for packaging postgresql database JDBC driver to be used by spark-thriftserver as an external storage. See demonstration [docker compose file](../compose-controller-spark-sql-external-storage.yaml).
+
+
+ > This folder should be ignored during continuous integration tests.
diff --git a/docker/hive-site_example.xml b/docker/hive-site_example.xml
index 7fb57f6c7..ad93e0d25 100644
--- a/docker/hive-site_example.xml
+++ b/docker/hive-site_example.xml
@@ -17,7 +17,8 @@ https://cwiki.apache.org/confluence/display/Hive/Configuration+Properties#Config
- jdbc:postgresql://172.18.0.1:5432/custom_metastore_db
+
+ jdbc:postgresql://postgres:5432/custom_metastore_db